1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=armeb-unknown | FileCheck %s
3; RUN: llc < %s -mtriple=armv6eb-unknown | FileCheck %s --check-prefix=CHECK-ARMv6
4; RUN: llc < %s -mtriple=thumbv6meb-none-eabi | FileCheck %s --check-prefix=CHECK-THUMBv6
5; RUN: llc < %s -mtriple=thumbv6meb-none-eabi | FileCheck %s --check-prefix=CHECK-THUMBv7
6
7; i8* p; // p is 4 byte aligned
8; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
9define i32 @load_i32_by_i8_big_endian(i32* %arg) {
10; CHECK-LABEL: load_i32_by_i8_big_endian:
11; CHECK:       @ %bb.0:
12; CHECK-NEXT:    ldr r0, [r0]
13; CHECK-NEXT:    mov pc, lr
14;
15; CHECK-ARMv6-LABEL: load_i32_by_i8_big_endian:
16; CHECK-ARMv6:       @ %bb.0:
17; CHECK-ARMv6-NEXT:    ldr r0, [r0]
18; CHECK-ARMv6-NEXT:    bx lr
19;
20; CHECK-THUMBv6-LABEL: load_i32_by_i8_big_endian:
21; CHECK-THUMBv6:       @ %bb.0:
22; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
23; CHECK-THUMBv6-NEXT:    bx lr
24;
25; CHECK-THUMBv7-LABEL: load_i32_by_i8_big_endian:
26; CHECK-THUMBv7:       @ %bb.0:
27; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
28; CHECK-THUMBv7-NEXT:    bx lr
29
30  %tmp = bitcast i32* %arg to i8*
31  %tmp1 = load i8, i8* %tmp, align 4
32  %tmp2 = zext i8 %tmp1 to i32
33  %tmp3 = shl nuw nsw i32 %tmp2, 24
34  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
35  %tmp5 = load i8, i8* %tmp4, align 1
36  %tmp6 = zext i8 %tmp5 to i32
37  %tmp7 = shl nuw nsw i32 %tmp6, 16
38  %tmp8 = or i32 %tmp7, %tmp3
39  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
40  %tmp10 = load i8, i8* %tmp9, align 1
41  %tmp11 = zext i8 %tmp10 to i32
42  %tmp12 = shl nuw nsw i32 %tmp11, 8
43  %tmp13 = or i32 %tmp8, %tmp12
44  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
45  %tmp15 = load i8, i8* %tmp14, align 1
46  %tmp16 = zext i8 %tmp15 to i32
47  %tmp17 = or i32 %tmp13, %tmp16
48  ret i32 %tmp17
49}
50
51; i8* p; // p is 4 byte aligned
52; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
53define i32 @load_i32_by_i8_bswap(i32* %arg) {
54; BSWAP is not supported by 32 bit target
55; CHECK-LABEL: load_i32_by_i8_bswap:
56; CHECK:       @ %bb.0:
57; CHECK-NEXT:    ldr r0, [r0]
58; CHECK-NEXT:    mov r1, #65280
59; CHECK-NEXT:    mov r2, #16711680
60; CHECK-NEXT:    and r1, r1, r0, lsr #8
61; CHECK-NEXT:    and r2, r2, r0, lsl #8
62; CHECK-NEXT:    orr r1, r1, r0, lsr #24
63; CHECK-NEXT:    orr r0, r2, r0, lsl #24
64; CHECK-NEXT:    orr r0, r0, r1
65; CHECK-NEXT:    mov pc, lr
66;
67; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
68; CHECK-ARMv6:       @ %bb.0:
69; CHECK-ARMv6-NEXT:    ldr r0, [r0]
70; CHECK-ARMv6-NEXT:    rev r0, r0
71; CHECK-ARMv6-NEXT:    bx lr
72;
73; CHECK-THUMBv6-LABEL: load_i32_by_i8_bswap:
74; CHECK-THUMBv6:       @ %bb.0:
75; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
76; CHECK-THUMBv6-NEXT:    rev r0, r0
77; CHECK-THUMBv6-NEXT:    bx lr
78;
79; CHECK-THUMBv7-LABEL: load_i32_by_i8_bswap:
80; CHECK-THUMBv7:       @ %bb.0:
81; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
82; CHECK-THUMBv7-NEXT:    rev r0, r0
83; CHECK-THUMBv7-NEXT:    bx lr
84
85  %tmp = bitcast i32* %arg to i8*
86  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
87  %tmp2 = load i8, i8* %tmp1, align 4
88  %tmp3 = zext i8 %tmp2 to i32
89  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
90  %tmp5 = load i8, i8* %tmp4, align 1
91  %tmp6 = zext i8 %tmp5 to i32
92  %tmp7 = shl nuw nsw i32 %tmp6, 8
93  %tmp8 = or i32 %tmp7, %tmp3
94  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
95  %tmp10 = load i8, i8* %tmp9, align 1
96  %tmp11 = zext i8 %tmp10 to i32
97  %tmp12 = shl nuw nsw i32 %tmp11, 16
98  %tmp13 = or i32 %tmp8, %tmp12
99  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
100  %tmp15 = load i8, i8* %tmp14, align 1
101  %tmp16 = zext i8 %tmp15 to i32
102  %tmp17 = shl nuw nsw i32 %tmp16, 24
103  %tmp18 = or i32 %tmp13, %tmp17
104  ret i32 %tmp18
105}
106
107; i8* p; // p is 4 byte aligned
108; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
109define i32 @load_i32_by_i16_by_i8_big_endian(i32* %arg) {
110; CHECK-LABEL: load_i32_by_i16_by_i8_big_endian:
111; CHECK:       @ %bb.0:
112; CHECK-NEXT:    ldr r0, [r0]
113; CHECK-NEXT:    mov pc, lr
114;
115; CHECK-ARMv6-LABEL: load_i32_by_i16_by_i8_big_endian:
116; CHECK-ARMv6:       @ %bb.0:
117; CHECK-ARMv6-NEXT:    ldr r0, [r0]
118; CHECK-ARMv6-NEXT:    bx lr
119;
120; CHECK-THUMBv6-LABEL: load_i32_by_i16_by_i8_big_endian:
121; CHECK-THUMBv6:       @ %bb.0:
122; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
123; CHECK-THUMBv6-NEXT:    bx lr
124;
125; CHECK-THUMBv7-LABEL: load_i32_by_i16_by_i8_big_endian:
126; CHECK-THUMBv7:       @ %bb.0:
127; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
128; CHECK-THUMBv7-NEXT:    bx lr
129
130  %tmp = bitcast i32* %arg to i8*
131  %tmp1 = load i8, i8* %tmp, align 4
132  %tmp2 = zext i8 %tmp1 to i16
133  %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
134  %tmp4 = load i8, i8* %tmp3, align 1
135  %tmp5 = zext i8 %tmp4 to i16
136  %tmp6 = shl nuw nsw i16 %tmp2, 8
137  %tmp7 = or i16 %tmp6, %tmp5
138  %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
139  %tmp9 = load i8, i8* %tmp8, align 1
140  %tmp10 = zext i8 %tmp9 to i16
141  %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
142  %tmp12 = load i8, i8* %tmp11, align 1
143  %tmp13 = zext i8 %tmp12 to i16
144  %tmp14 = shl nuw nsw i16 %tmp10, 8
145  %tmp15 = or i16 %tmp14, %tmp13
146  %tmp16 = zext i16 %tmp7 to i32
147  %tmp17 = zext i16 %tmp15 to i32
148  %tmp18 = shl nuw nsw i32 %tmp16, 16
149  %tmp19 = or i32 %tmp18, %tmp17
150  ret i32 %tmp19
151}
152
153; i16* p; // p is 4 byte aligned
154; ((i32) p[0] << 16) | (i32) p[1]
155define i32 @load_i32_by_i16(i32* %arg) {
156; CHECK-LABEL: load_i32_by_i16:
157; CHECK:       @ %bb.0:
158; CHECK-NEXT:    ldr r0, [r0]
159; CHECK-NEXT:    mov pc, lr
160;
161; CHECK-ARMv6-LABEL: load_i32_by_i16:
162; CHECK-ARMv6:       @ %bb.0:
163; CHECK-ARMv6-NEXT:    ldr r0, [r0]
164; CHECK-ARMv6-NEXT:    bx lr
165;
166; CHECK-THUMBv6-LABEL: load_i32_by_i16:
167; CHECK-THUMBv6:       @ %bb.0:
168; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
169; CHECK-THUMBv6-NEXT:    bx lr
170;
171; CHECK-THUMBv7-LABEL: load_i32_by_i16:
172; CHECK-THUMBv7:       @ %bb.0:
173; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
174; CHECK-THUMBv7-NEXT:    bx lr
175
176  %tmp = bitcast i32* %arg to i16*
177  %tmp1 = load i16, i16* %tmp, align 4
178  %tmp2 = zext i16 %tmp1 to i32
179  %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
180  %tmp4 = load i16, i16* %tmp3, align 1
181  %tmp5 = zext i16 %tmp4 to i32
182  %tmp6 = shl nuw nsw i32 %tmp2, 16
183  %tmp7 = or i32 %tmp6, %tmp5
184  ret i32 %tmp7
185}
186
187; i16* p_16; // p_16 is 4 byte aligned
188; i8* p_8 = (i8*) p_16;
189; (i32) (p_16[0] << 16) | ((i32) p[2] << 8) | (i32) p[3]
190define i32 @load_i32_by_i16_i8(i32* %arg) {
191; CHECK-LABEL: load_i32_by_i16_i8:
192; CHECK:       @ %bb.0:
193; CHECK-NEXT:    ldr r0, [r0]
194; CHECK-NEXT:    mov pc, lr
195;
196; CHECK-ARMv6-LABEL: load_i32_by_i16_i8:
197; CHECK-ARMv6:       @ %bb.0:
198; CHECK-ARMv6-NEXT:    ldr r0, [r0]
199; CHECK-ARMv6-NEXT:    bx lr
200;
201; CHECK-THUMBv6-LABEL: load_i32_by_i16_i8:
202; CHECK-THUMBv6:       @ %bb.0:
203; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
204; CHECK-THUMBv6-NEXT:    bx lr
205;
206; CHECK-THUMBv7-LABEL: load_i32_by_i16_i8:
207; CHECK-THUMBv7:       @ %bb.0:
208; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
209; CHECK-THUMBv7-NEXT:    bx lr
210
211  %tmp = bitcast i32* %arg to i16*
212  %tmp1 = bitcast i32* %arg to i8*
213  %tmp2 = load i16, i16* %tmp, align 4
214  %tmp3 = zext i16 %tmp2 to i32
215  %tmp4 = shl nuw nsw i32 %tmp3, 16
216  %tmp5 = getelementptr inbounds i8, i8* %tmp1, i32 2
217  %tmp6 = load i8, i8* %tmp5, align 1
218  %tmp7 = zext i8 %tmp6 to i32
219  %tmp8 = shl nuw nsw i32 %tmp7, 8
220  %tmp9 = getelementptr inbounds i8, i8* %tmp1, i32 3
221  %tmp10 = load i8, i8* %tmp9, align 1
222  %tmp11 = zext i8 %tmp10 to i32
223  %tmp12 = or i32 %tmp8, %tmp11
224  %tmp13 = or i32 %tmp12, %tmp4
225  ret i32 %tmp13
226}
227
228; i8* p; // p is 8 byte aligned
229; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
230define i64 @load_i64_by_i8_bswap(i64* %arg) {
231; CHECK-LABEL: load_i64_by_i8_bswap:
232; CHECK:       @ %bb.0:
233; CHECK-NEXT:    push {r11, lr}
234; CHECK-NEXT:    ldr r1, [r0]
235; CHECK-NEXT:    mov r12, #65280
236; CHECK-NEXT:    ldr r0, [r0, #4]
237; CHECK-NEXT:    mov lr, #16711680
238; CHECK-NEXT:    and r3, r12, r0, lsr #8
239; CHECK-NEXT:    and r2, lr, r0, lsl #8
240; CHECK-NEXT:    orr r3, r3, r0, lsr #24
241; CHECK-NEXT:    orr r0, r2, r0, lsl #24
242; CHECK-NEXT:    and r2, r12, r1, lsr #8
243; CHECK-NEXT:    orr r0, r0, r3
244; CHECK-NEXT:    and r3, lr, r1, lsl #8
245; CHECK-NEXT:    orr r2, r2, r1, lsr #24
246; CHECK-NEXT:    orr r1, r3, r1, lsl #24
247; CHECK-NEXT:    orr r1, r1, r2
248; CHECK-NEXT:    pop {r11, lr}
249; CHECK-NEXT:    mov pc, lr
250;
251; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
252; CHECK-ARMv6:       @ %bb.0:
253; CHECK-ARMv6-NEXT:    ldrd r2, r3, [r0]
254; CHECK-ARMv6-NEXT:    rev r0, r3
255; CHECK-ARMv6-NEXT:    rev r1, r2
256; CHECK-ARMv6-NEXT:    bx lr
257;
258; CHECK-THUMBv6-LABEL: load_i64_by_i8_bswap:
259; CHECK-THUMBv6:       @ %bb.0:
260; CHECK-THUMBv6-NEXT:    ldr r1, [r0]
261; CHECK-THUMBv6-NEXT:    ldr r0, [r0, #4]
262; CHECK-THUMBv6-NEXT:    rev r0, r0
263; CHECK-THUMBv6-NEXT:    rev r1, r1
264; CHECK-THUMBv6-NEXT:    bx lr
265;
266; CHECK-THUMBv7-LABEL: load_i64_by_i8_bswap:
267; CHECK-THUMBv7:       @ %bb.0:
268; CHECK-THUMBv7-NEXT:    ldr r1, [r0]
269; CHECK-THUMBv7-NEXT:    ldr r0, [r0, #4]
270; CHECK-THUMBv7-NEXT:    rev r0, r0
271; CHECK-THUMBv7-NEXT:    rev r1, r1
272; CHECK-THUMBv7-NEXT:    bx lr
273
274  %tmp = bitcast i64* %arg to i8*
275  %tmp1 = load i8, i8* %tmp, align 8
276  %tmp2 = zext i8 %tmp1 to i64
277  %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
278  %tmp4 = load i8, i8* %tmp3, align 1
279  %tmp5 = zext i8 %tmp4 to i64
280  %tmp6 = shl nuw nsw i64 %tmp5, 8
281  %tmp7 = or i64 %tmp6, %tmp2
282  %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
283  %tmp9 = load i8, i8* %tmp8, align 1
284  %tmp10 = zext i8 %tmp9 to i64
285  %tmp11 = shl nuw nsw i64 %tmp10, 16
286  %tmp12 = or i64 %tmp7, %tmp11
287  %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
288  %tmp14 = load i8, i8* %tmp13, align 1
289  %tmp15 = zext i8 %tmp14 to i64
290  %tmp16 = shl nuw nsw i64 %tmp15, 24
291  %tmp17 = or i64 %tmp12, %tmp16
292  %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
293  %tmp19 = load i8, i8* %tmp18, align 1
294  %tmp20 = zext i8 %tmp19 to i64
295  %tmp21 = shl nuw nsw i64 %tmp20, 32
296  %tmp22 = or i64 %tmp17, %tmp21
297  %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
298  %tmp24 = load i8, i8* %tmp23, align 1
299  %tmp25 = zext i8 %tmp24 to i64
300  %tmp26 = shl nuw nsw i64 %tmp25, 40
301  %tmp27 = or i64 %tmp22, %tmp26
302  %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
303  %tmp29 = load i8, i8* %tmp28, align 1
304  %tmp30 = zext i8 %tmp29 to i64
305  %tmp31 = shl nuw nsw i64 %tmp30, 48
306  %tmp32 = or i64 %tmp27, %tmp31
307  %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
308  %tmp34 = load i8, i8* %tmp33, align 1
309  %tmp35 = zext i8 %tmp34 to i64
310  %tmp36 = shl nuw i64 %tmp35, 56
311  %tmp37 = or i64 %tmp32, %tmp36
312  ret i64 %tmp37
313}
314
315; i8* p; // p is 8 byte aligned
316; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
317define i64 @load_i64_by_i8(i64* %arg) {
318; CHECK-LABEL: load_i64_by_i8:
319; CHECK:       @ %bb.0:
320; CHECK-NEXT:    ldr r2, [r0]
321; CHECK-NEXT:    ldr r1, [r0, #4]
322; CHECK-NEXT:    mov r0, r2
323; CHECK-NEXT:    mov pc, lr
324;
325; CHECK-ARMv6-LABEL: load_i64_by_i8:
326; CHECK-ARMv6:       @ %bb.0:
327; CHECK-ARMv6-NEXT:    ldrd r0, r1, [r0]
328; CHECK-ARMv6-NEXT:    bx lr
329;
330; CHECK-THUMBv6-LABEL: load_i64_by_i8:
331; CHECK-THUMBv6:       @ %bb.0:
332; CHECK-THUMBv6-NEXT:    ldr r2, [r0]
333; CHECK-THUMBv6-NEXT:    ldr r1, [r0, #4]
334; CHECK-THUMBv6-NEXT:    mov r0, r2
335; CHECK-THUMBv6-NEXT:    bx lr
336;
337; CHECK-THUMBv7-LABEL: load_i64_by_i8:
338; CHECK-THUMBv7:       @ %bb.0:
339; CHECK-THUMBv7-NEXT:    ldr r2, [r0]
340; CHECK-THUMBv7-NEXT:    ldr r1, [r0, #4]
341; CHECK-THUMBv7-NEXT:    mov r0, r2
342; CHECK-THUMBv7-NEXT:    bx lr
343
344  %tmp = bitcast i64* %arg to i8*
345  %tmp1 = load i8, i8* %tmp, align 8
346  %tmp2 = zext i8 %tmp1 to i64
347  %tmp3 = shl nuw i64 %tmp2, 56
348  %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
349  %tmp5 = load i8, i8* %tmp4, align 1
350  %tmp6 = zext i8 %tmp5 to i64
351  %tmp7 = shl nuw nsw i64 %tmp6, 48
352  %tmp8 = or i64 %tmp7, %tmp3
353  %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
354  %tmp10 = load i8, i8* %tmp9, align 1
355  %tmp11 = zext i8 %tmp10 to i64
356  %tmp12 = shl nuw nsw i64 %tmp11, 40
357  %tmp13 = or i64 %tmp8, %tmp12
358  %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
359  %tmp15 = load i8, i8* %tmp14, align 1
360  %tmp16 = zext i8 %tmp15 to i64
361  %tmp17 = shl nuw nsw i64 %tmp16, 32
362  %tmp18 = or i64 %tmp13, %tmp17
363  %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
364  %tmp20 = load i8, i8* %tmp19, align 1
365  %tmp21 = zext i8 %tmp20 to i64
366  %tmp22 = shl nuw nsw i64 %tmp21, 24
367  %tmp23 = or i64 %tmp18, %tmp22
368  %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
369  %tmp25 = load i8, i8* %tmp24, align 1
370  %tmp26 = zext i8 %tmp25 to i64
371  %tmp27 = shl nuw nsw i64 %tmp26, 16
372  %tmp28 = or i64 %tmp23, %tmp27
373  %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
374  %tmp30 = load i8, i8* %tmp29, align 1
375  %tmp31 = zext i8 %tmp30 to i64
376  %tmp32 = shl nuw nsw i64 %tmp31, 8
377  %tmp33 = or i64 %tmp28, %tmp32
378  %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
379  %tmp35 = load i8, i8* %tmp34, align 1
380  %tmp36 = zext i8 %tmp35 to i64
381  %tmp37 = or i64 %tmp33, %tmp36
382  ret i64 %tmp37
383}
384
385; i8* p; // p[1] is 4 byte aligned
386; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
387define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
388; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
389; CHECK:       @ %bb.0:
390; CHECK-NEXT:    ldr r0, [r0, #1]
391; CHECK-NEXT:    mov r1, #65280
392; CHECK-NEXT:    mov r2, #16711680
393; CHECK-NEXT:    and r1, r1, r0, lsr #8
394; CHECK-NEXT:    and r2, r2, r0, lsl #8
395; CHECK-NEXT:    orr r1, r1, r0, lsr #24
396; CHECK-NEXT:    orr r0, r2, r0, lsl #24
397; CHECK-NEXT:    orr r0, r0, r1
398; CHECK-NEXT:    mov pc, lr
399;
400; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
401; CHECK-ARMv6:       @ %bb.0:
402; CHECK-ARMv6-NEXT:    ldr r0, [r0, #1]
403; CHECK-ARMv6-NEXT:    rev r0, r0
404; CHECK-ARMv6-NEXT:    bx lr
405;
406; CHECK-THUMBv6-LABEL: load_i32_by_i8_nonzero_offset:
407; CHECK-THUMBv6:       @ %bb.0:
408; CHECK-THUMBv6-NEXT:    movs r1, #1
409; CHECK-THUMBv6-NEXT:    ldr r0, [r0, r1]
410; CHECK-THUMBv6-NEXT:    rev r0, r0
411; CHECK-THUMBv6-NEXT:    bx lr
412;
413; CHECK-THUMBv7-LABEL: load_i32_by_i8_nonzero_offset:
414; CHECK-THUMBv7:       @ %bb.0:
415; CHECK-THUMBv7-NEXT:    movs r1, #1
416; CHECK-THUMBv7-NEXT:    ldr r0, [r0, r1]
417; CHECK-THUMBv7-NEXT:    rev r0, r0
418; CHECK-THUMBv7-NEXT:    bx lr
419
420
421  %tmp = bitcast i32* %arg to i8*
422  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
423  %tmp2 = load i8, i8* %tmp1, align 4
424  %tmp3 = zext i8 %tmp2 to i32
425  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
426  %tmp5 = load i8, i8* %tmp4, align 1
427  %tmp6 = zext i8 %tmp5 to i32
428  %tmp7 = shl nuw nsw i32 %tmp6, 8
429  %tmp8 = or i32 %tmp7, %tmp3
430  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
431  %tmp10 = load i8, i8* %tmp9, align 1
432  %tmp11 = zext i8 %tmp10 to i32
433  %tmp12 = shl nuw nsw i32 %tmp11, 16
434  %tmp13 = or i32 %tmp8, %tmp12
435  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
436  %tmp15 = load i8, i8* %tmp14, align 1
437  %tmp16 = zext i8 %tmp15 to i32
438  %tmp17 = shl nuw nsw i32 %tmp16, 24
439  %tmp18 = or i32 %tmp13, %tmp17
440  ret i32 %tmp18
441}
442
443; i8* p; // p[-4] is 4 byte aligned
444; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
445define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
446; CHECK-LABEL: load_i32_by_i8_neg_offset:
447; CHECK:       @ %bb.0:
448; CHECK-NEXT:    ldr r0, [r0, #-4]
449; CHECK-NEXT:    mov r1, #65280
450; CHECK-NEXT:    mov r2, #16711680
451; CHECK-NEXT:    and r1, r1, r0, lsr #8
452; CHECK-NEXT:    and r2, r2, r0, lsl #8
453; CHECK-NEXT:    orr r1, r1, r0, lsr #24
454; CHECK-NEXT:    orr r0, r2, r0, lsl #24
455; CHECK-NEXT:    orr r0, r0, r1
456; CHECK-NEXT:    mov pc, lr
457;
458; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
459; CHECK-ARMv6:       @ %bb.0:
460; CHECK-ARMv6-NEXT:    ldr r0, [r0, #-4]
461; CHECK-ARMv6-NEXT:    rev r0, r0
462; CHECK-ARMv6-NEXT:    bx lr
463;
464; CHECK-THUMBv6-LABEL: load_i32_by_i8_neg_offset:
465; CHECK-THUMBv6:       @ %bb.0:
466; CHECK-THUMBv6-NEXT:    subs r0, r0, #4
467; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
468; CHECK-THUMBv6-NEXT:    rev r0, r0
469; CHECK-THUMBv6-NEXT:    bx lr
470;
471; CHECK-THUMBv7-LABEL: load_i32_by_i8_neg_offset:
472; CHECK-THUMBv7:       @ %bb.0:
473; CHECK-THUMBv7-NEXT:    subs r0, r0, #4
474; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
475; CHECK-THUMBv7-NEXT:    rev r0, r0
476; CHECK-THUMBv7-NEXT:    bx lr
477
478
479  %tmp = bitcast i32* %arg to i8*
480  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
481  %tmp2 = load i8, i8* %tmp1, align 4
482  %tmp3 = zext i8 %tmp2 to i32
483  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
484  %tmp5 = load i8, i8* %tmp4, align 1
485  %tmp6 = zext i8 %tmp5 to i32
486  %tmp7 = shl nuw nsw i32 %tmp6, 8
487  %tmp8 = or i32 %tmp7, %tmp3
488  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
489  %tmp10 = load i8, i8* %tmp9, align 1
490  %tmp11 = zext i8 %tmp10 to i32
491  %tmp12 = shl nuw nsw i32 %tmp11, 16
492  %tmp13 = or i32 %tmp8, %tmp12
493  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
494  %tmp15 = load i8, i8* %tmp14, align 1
495  %tmp16 = zext i8 %tmp15 to i32
496  %tmp17 = shl nuw nsw i32 %tmp16, 24
497  %tmp18 = or i32 %tmp13, %tmp17
498  ret i32 %tmp18
499}
500
501; i8* p; // p[1] is 4 byte aligned
502; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
503define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
504; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
505; CHECK:       @ %bb.0:
506; CHECK-NEXT:    ldr r0, [r0, #1]
507; CHECK-NEXT:    mov pc, lr
508;
509; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
510; CHECK-ARMv6:       @ %bb.0:
511; CHECK-ARMv6-NEXT:    ldr r0, [r0, #1]
512; CHECK-ARMv6-NEXT:    bx lr
513;
514; CHECK-THUMBv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
515; CHECK-THUMBv6:       @ %bb.0:
516; CHECK-THUMBv6-NEXT:    movs r1, #1
517; CHECK-THUMBv6-NEXT:    ldr r0, [r0, r1]
518; CHECK-THUMBv6-NEXT:    bx lr
519;
520; CHECK-THUMBv7-LABEL: load_i32_by_i8_nonzero_offset_bswap:
521; CHECK-THUMBv7:       @ %bb.0:
522; CHECK-THUMBv7-NEXT:    movs r1, #1
523; CHECK-THUMBv7-NEXT:    ldr r0, [r0, r1]
524; CHECK-THUMBv7-NEXT:    bx lr
525
526
527  %tmp = bitcast i32* %arg to i8*
528  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
529  %tmp2 = load i8, i8* %tmp1, align 1
530  %tmp3 = zext i8 %tmp2 to i32
531  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
532  %tmp5 = load i8, i8* %tmp4, align 1
533  %tmp6 = zext i8 %tmp5 to i32
534  %tmp7 = shl nuw nsw i32 %tmp6, 8
535  %tmp8 = or i32 %tmp7, %tmp3
536  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
537  %tmp10 = load i8, i8* %tmp9, align 1
538  %tmp11 = zext i8 %tmp10 to i32
539  %tmp12 = shl nuw nsw i32 %tmp11, 16
540  %tmp13 = or i32 %tmp8, %tmp12
541  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
542  %tmp15 = load i8, i8* %tmp14, align 4
543  %tmp16 = zext i8 %tmp15 to i32
544  %tmp17 = shl nuw nsw i32 %tmp16, 24
545  %tmp18 = or i32 %tmp13, %tmp17
546  ret i32 %tmp18
547}
548
549; i8* p; // p[-4] is 4 byte aligned
550; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
551define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
552; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
553; CHECK:       @ %bb.0:
554; CHECK-NEXT:    ldr r0, [r0, #-4]
555; CHECK-NEXT:    mov pc, lr
556;
557; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
558; CHECK-ARMv6:       @ %bb.0:
559; CHECK-ARMv6-NEXT:    ldr r0, [r0, #-4]
560; CHECK-ARMv6-NEXT:    bx lr
561;
562; CHECK-THUMBv6-LABEL: load_i32_by_i8_neg_offset_bswap:
563; CHECK-THUMBv6:       @ %bb.0:
564; CHECK-THUMBv6-NEXT:    subs r0, r0, #4
565; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
566; CHECK-THUMBv6-NEXT:    bx lr
567;
568; CHECK-THUMBv7-LABEL: load_i32_by_i8_neg_offset_bswap:
569; CHECK-THUMBv7:       @ %bb.0:
570; CHECK-THUMBv7-NEXT:    subs r0, r0, #4
571; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
572; CHECK-THUMBv7-NEXT:    bx lr
573
574
575  %tmp = bitcast i32* %arg to i8*
576  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
577  %tmp2 = load i8, i8* %tmp1, align 1
578  %tmp3 = zext i8 %tmp2 to i32
579  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
580  %tmp5 = load i8, i8* %tmp4, align 1
581  %tmp6 = zext i8 %tmp5 to i32
582  %tmp7 = shl nuw nsw i32 %tmp6, 8
583  %tmp8 = or i32 %tmp7, %tmp3
584  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
585  %tmp10 = load i8, i8* %tmp9, align 1
586  %tmp11 = zext i8 %tmp10 to i32
587  %tmp12 = shl nuw nsw i32 %tmp11, 16
588  %tmp13 = or i32 %tmp8, %tmp12
589  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
590  %tmp15 = load i8, i8* %tmp14, align 4
591  %tmp16 = zext i8 %tmp15 to i32
592  %tmp17 = shl nuw nsw i32 %tmp16, 24
593  %tmp18 = or i32 %tmp13, %tmp17
594  ret i32 %tmp18
595}
596
597declare i16 @llvm.bswap.i16(i16)
598
599; i16* p; // p is 4 byte aligned
600; (i32) bswap(p[0]) | (i32) bswap(p[1] << 16)
601define i32 @load_i32_by_bswap_i16(i32* %arg) {
602; CHECK-LABEL: load_i32_by_bswap_i16:
603; CHECK:       @ %bb.0:
604; CHECK-NEXT:    ldr r0, [r0]
605; CHECK-NEXT:    mov r1, #65280
606; CHECK-NEXT:    mov r2, #16711680
607; CHECK-NEXT:    and r1, r1, r0, lsr #8
608; CHECK-NEXT:    and r2, r2, r0, lsl #8
609; CHECK-NEXT:    orr r1, r1, r0, lsr #24
610; CHECK-NEXT:    orr r0, r2, r0, lsl #24
611; CHECK-NEXT:    orr r0, r0, r1
612; CHECK-NEXT:    mov pc, lr
613;
614; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
615; CHECK-ARMv6:       @ %bb.0:
616; CHECK-ARMv6-NEXT:    ldr r0, [r0]
617; CHECK-ARMv6-NEXT:    rev r0, r0
618; CHECK-ARMv6-NEXT:    bx lr
619;
620; CHECK-THUMBv6-LABEL: load_i32_by_bswap_i16:
621; CHECK-THUMBv6:       @ %bb.0:
622; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
623; CHECK-THUMBv6-NEXT:    rev r0, r0
624; CHECK-THUMBv6-NEXT:    bx lr
625;
626; CHECK-THUMBv7-LABEL: load_i32_by_bswap_i16:
627; CHECK-THUMBv7:       @ %bb.0:
628; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
629; CHECK-THUMBv7-NEXT:    rev r0, r0
630; CHECK-THUMBv7-NEXT:    bx lr
631
632
633  %tmp = bitcast i32* %arg to i16*
634  %tmp1 = load i16, i16* %tmp, align 4
635  %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
636  %tmp2 = zext i16 %tmp11 to i32
637  %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
638  %tmp4 = load i16, i16* %tmp3, align 1
639  %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
640  %tmp5 = zext i16 %tmp41 to i32
641  %tmp6 = shl nuw nsw i32 %tmp5, 16
642  %tmp7 = or i32 %tmp6, %tmp2
643  ret i32 %tmp7
644}
645
646; i16* p; // p is 4 byte aligned
647; (i32) p[1] | (sext(p[0] << 16) to i32)
648define i32 @load_i32_by_sext_i16(i32* %arg) {
649; CHECK-LABEL: load_i32_by_sext_i16:
650; CHECK:       @ %bb.0:
651; CHECK-NEXT:    ldr r0, [r0]
652; CHECK-NEXT:    mov pc, lr
653;
654; CHECK-ARMv6-LABEL: load_i32_by_sext_i16:
655; CHECK-ARMv6:       @ %bb.0:
656; CHECK-ARMv6-NEXT:    ldr r0, [r0]
657; CHECK-ARMv6-NEXT:    bx lr
658;
659; CHECK-THUMBv6-LABEL: load_i32_by_sext_i16:
660; CHECK-THUMBv6:       @ %bb.0:
661; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
662; CHECK-THUMBv6-NEXT:    bx lr
663;
664; CHECK-THUMBv7-LABEL: load_i32_by_sext_i16:
665; CHECK-THUMBv7:       @ %bb.0:
666; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
667; CHECK-THUMBv7-NEXT:    bx lr
668  %tmp = bitcast i32* %arg to i16*
669  %tmp1 = load i16, i16* %tmp, align 4
670  %tmp2 = sext i16 %tmp1 to i32
671  %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
672  %tmp4 = load i16, i16* %tmp3, align 1
673  %tmp5 = zext i16 %tmp4 to i32
674  %tmp6 = shl nuw nsw i32 %tmp2, 16
675  %tmp7 = or i32 %tmp6, %tmp5
676  ret i32 %tmp7
677}
678
679; i8* arg; i32 i;
680; p = arg + 12;
681; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
682define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
683; CHECK-LABEL: load_i32_by_i8_base_offset_index:
684; CHECK:       @ %bb.0:
685; CHECK-NEXT:    add r0, r0, r1
686; CHECK-NEXT:    mov r1, #65280
687; CHECK-NEXT:    mov r2, #16711680
688; CHECK-NEXT:    ldr r0, [r0, #12]
689; CHECK-NEXT:    and r1, r1, r0, lsr #8
690; CHECK-NEXT:    and r2, r2, r0, lsl #8
691; CHECK-NEXT:    orr r1, r1, r0, lsr #24
692; CHECK-NEXT:    orr r0, r2, r0, lsl #24
693; CHECK-NEXT:    orr r0, r0, r1
694; CHECK-NEXT:    mov pc, lr
695;
696; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
697; CHECK-ARMv6:       @ %bb.0:
698; CHECK-ARMv6-NEXT:    add r0, r0, r1
699; CHECK-ARMv6-NEXT:    ldr r0, [r0, #12]
700; CHECK-ARMv6-NEXT:    rev r0, r0
701; CHECK-ARMv6-NEXT:    bx lr
702;
703; CHECK-THUMBv6-LABEL: load_i32_by_i8_base_offset_index:
704; CHECK-THUMBv6:       @ %bb.0:
705; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
706; CHECK-THUMBv6-NEXT:    ldr r0, [r0, #12]
707; CHECK-THUMBv6-NEXT:    rev r0, r0
708; CHECK-THUMBv6-NEXT:    bx lr
709;
710; CHECK-THUMBv7-LABEL: load_i32_by_i8_base_offset_index:
711; CHECK-THUMBv7:       @ %bb.0:
712; CHECK-THUMBv7-NEXT:    adds r0, r0, r1
713; CHECK-THUMBv7-NEXT:    ldr r0, [r0, #12]
714; CHECK-THUMBv7-NEXT:    rev r0, r0
715; CHECK-THUMBv7-NEXT:    bx lr
716  %tmp = add nuw nsw i32 %i, 3
717  %tmp2 = add nuw nsw i32 %i, 2
718  %tmp3 = add nuw nsw i32 %i, 1
719  %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
720  %tmp5 = zext i32 %i to i64
721  %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
722  %tmp7 = load i8, i8* %tmp6, align 4
723  %tmp8 = zext i8 %tmp7 to i32
724  %tmp9 = zext i32 %tmp3 to i64
725  %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
726  %tmp11 = load i8, i8* %tmp10, align 1
727  %tmp12 = zext i8 %tmp11 to i32
728  %tmp13 = shl nuw nsw i32 %tmp12, 8
729  %tmp14 = or i32 %tmp13, %tmp8
730  %tmp15 = zext i32 %tmp2 to i64
731  %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
732  %tmp17 = load i8, i8* %tmp16, align 1
733  %tmp18 = zext i8 %tmp17 to i32
734  %tmp19 = shl nuw nsw i32 %tmp18, 16
735  %tmp20 = or i32 %tmp14, %tmp19
736  %tmp21 = zext i32 %tmp to i64
737  %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
738  %tmp23 = load i8, i8* %tmp22, align 1
739  %tmp24 = zext i8 %tmp23 to i32
740  %tmp25 = shl nuw i32 %tmp24, 24
741  %tmp26 = or i32 %tmp20, %tmp25
742  ret i32 %tmp26
743}
744
745; i8* arg; i32 i;
746; p = arg + 12;
747; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
748define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
749; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
750; CHECK:       @ %bb.0:
751; CHECK-NEXT:    add r0, r1, r0
752; CHECK-NEXT:    mov r1, #65280
753; CHECK-NEXT:    mov r2, #16711680
754; CHECK-NEXT:    ldr r0, [r0, #13]
755; CHECK-NEXT:    and r1, r1, r0, lsr #8
756; CHECK-NEXT:    and r2, r2, r0, lsl #8
757; CHECK-NEXT:    orr r1, r1, r0, lsr #24
758; CHECK-NEXT:    orr r0, r2, r0, lsl #24
759; CHECK-NEXT:    orr r0, r0, r1
760; CHECK-NEXT:    mov pc, lr
761;
762; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
763; CHECK-ARMv6:       @ %bb.0:
764; CHECK-ARMv6-NEXT:    add r0, r1, r0
765; CHECK-ARMv6-NEXT:    ldr r0, [r0, #13]
766; CHECK-ARMv6-NEXT:    rev r0, r0
767; CHECK-ARMv6-NEXT:    bx lr
768;
769; CHECK-THUMBv6-LABEL: load_i32_by_i8_base_offset_index_2:
770; CHECK-THUMBv6:       @ %bb.0:
771; CHECK-THUMBv6-NEXT:    adds r0, r1, r0
772; CHECK-THUMBv6-NEXT:    movs r1, #13
773; CHECK-THUMBv6-NEXT:    ldr r0, [r0, r1]
774; CHECK-THUMBv6-NEXT:    rev r0, r0
775; CHECK-THUMBv6-NEXT:    bx lr
776;
777; CHECK-THUMBv7-LABEL: load_i32_by_i8_base_offset_index_2:
778; CHECK-THUMBv7:       @ %bb.0:
779; CHECK-THUMBv7-NEXT:    adds r0, r1, r0
780; CHECK-THUMBv7-NEXT:    movs r1, #13
781; CHECK-THUMBv7-NEXT:    ldr r0, [r0, r1]
782; CHECK-THUMBv7-NEXT:    rev r0, r0
783; CHECK-THUMBv7-NEXT:    bx lr
784
785  %tmp = add nuw nsw i32 %i, 4
786  %tmp2 = add nuw nsw i32 %i, 3
787  %tmp3 = add nuw nsw i32 %i, 2
788  %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
789  %tmp5 = add nuw nsw i32 %i, 1
790  %tmp27 = zext i32 %tmp5 to i64
791  %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
792  %tmp29 = load i8, i8* %tmp28, align 4
793  %tmp30 = zext i8 %tmp29 to i32
794  %tmp31 = zext i32 %tmp3 to i64
795  %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
796  %tmp33 = load i8, i8* %tmp32, align 1
797  %tmp34 = zext i8 %tmp33 to i32
798  %tmp35 = shl nuw nsw i32 %tmp34, 8
799  %tmp36 = or i32 %tmp35, %tmp30
800  %tmp37 = zext i32 %tmp2 to i64
801  %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
802  %tmp39 = load i8, i8* %tmp38, align 1
803  %tmp40 = zext i8 %tmp39 to i32
804  %tmp41 = shl nuw nsw i32 %tmp40, 16
805  %tmp42 = or i32 %tmp36, %tmp41
806  %tmp43 = zext i32 %tmp to i64
807  %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
808  %tmp45 = load i8, i8* %tmp44, align 1
809  %tmp46 = zext i8 %tmp45 to i32
810  %tmp47 = shl nuw i32 %tmp46, 24
811  %tmp48 = or i32 %tmp42, %tmp47
812  ret i32 %tmp48
813}
814
815; i8* p; // p is 2 byte aligned
816; (i32) p[0] | ((i32) p[1] << 8)
817define i32 @zext_load_i32_by_i8(i32* %arg) {
818; CHECK-LABEL: zext_load_i32_by_i8:
819; CHECK:       @ %bb.0:
820; CHECK-NEXT:    ldrb r1, [r0]
821; CHECK-NEXT:    ldrb r0, [r0, #1]
822; CHECK-NEXT:    orr r0, r1, r0, lsl #8
823; CHECK-NEXT:    mov pc, lr
824;
825; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
826; CHECK-ARMv6:       @ %bb.0:
827; CHECK-ARMv6-NEXT:    ldrh r0, [r0]
828; CHECK-ARMv6-NEXT:    lsl r0, r0, #16
829; CHECK-ARMv6-NEXT:    rev r0, r0
830; CHECK-ARMv6-NEXT:    bx lr
831;
832; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
833; CHECK-THUMBv6:       @ %bb.0:
834; CHECK-THUMBv6-NEXT:    ldrh r0, [r0]
835; CHECK-THUMBv6-NEXT:    lsls r0, r0, #16
836; CHECK-THUMBv6-NEXT:    rev r0, r0
837; CHECK-THUMBv6-NEXT:    bx lr
838;
839; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
840; CHECK-THUMBv7:       @ %bb.0:
841; CHECK-THUMBv7-NEXT:    ldrh r0, [r0]
842; CHECK-THUMBv7-NEXT:    lsls r0, r0, #16
843; CHECK-THUMBv7-NEXT:    rev r0, r0
844; CHECK-THUMBv7-NEXT:    bx lr
845
846  %tmp = bitcast i32* %arg to i8*
847  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
848  %tmp2 = load i8, i8* %tmp1, align 2
849  %tmp3 = zext i8 %tmp2 to i32
850  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
851  %tmp5 = load i8, i8* %tmp4, align 1
852  %tmp6 = zext i8 %tmp5 to i32
853  %tmp7 = shl nuw nsw i32 %tmp6, 8
854  %tmp8 = or i32 %tmp7, %tmp3
855  ret i32 %tmp8
856}
857
858; i8* p; // p is 2 byte aligned
859; ((i32) p[0] << 8) | ((i32) p[1] << 16)
860define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
861; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
862; CHECK:       @ %bb.0:
863; CHECK-NEXT:    ldrb r1, [r0]
864; CHECK-NEXT:    ldrb r0, [r0, #1]
865; CHECK-NEXT:    lsl r0, r0, #16
866; CHECK-NEXT:    orr r0, r0, r1, lsl #8
867; CHECK-NEXT:    mov pc, lr
868;
869; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8:
870; CHECK-ARMv6:       @ %bb.0:
871; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
872; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
873; CHECK-ARMv6-NEXT:    lsl r0, r0, #16
874; CHECK-ARMv6-NEXT:    orr r0, r0, r1, lsl #8
875; CHECK-ARMv6-NEXT:    bx lr
876;
877; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_shl_8:
878; CHECK-THUMBv6:       @ %bb.0:
879; CHECK-THUMBv6-NEXT:    ldrb r1, [r0]
880; CHECK-THUMBv6-NEXT:    lsls r1, r1, #8
881; CHECK-THUMBv6-NEXT:    ldrb r0, [r0, #1]
882; CHECK-THUMBv6-NEXT:    lsls r0, r0, #16
883; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
884; CHECK-THUMBv6-NEXT:    bx lr
885;
886; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_shl_8:
887; CHECK-THUMBv7:       @ %bb.0:
888; CHECK-THUMBv7-NEXT:    ldrb r1, [r0]
889; CHECK-THUMBv7-NEXT:    lsls r1, r1, #8
890; CHECK-THUMBv7-NEXT:    ldrb r0, [r0, #1]
891; CHECK-THUMBv7-NEXT:    lsls r0, r0, #16
892; CHECK-THUMBv7-NEXT:    adds r0, r0, r1
893; CHECK-THUMBv7-NEXT:    bx lr
894
895  %tmp = bitcast i32* %arg to i8*
896  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
897  %tmp2 = load i8, i8* %tmp1, align 2
898  %tmp3 = zext i8 %tmp2 to i32
899  %tmp30 = shl nuw nsw i32 %tmp3, 8
900  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
901  %tmp5 = load i8, i8* %tmp4, align 1
902  %tmp6 = zext i8 %tmp5 to i32
903  %tmp7 = shl nuw nsw i32 %tmp6, 16
904  %tmp8 = or i32 %tmp7, %tmp30
905  ret i32 %tmp8
906}
907
908; i8* p; // p is 2 byte aligned
909; ((i32) p[0] << 16) | ((i32) p[1] << 24)
910define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
911; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
912; CHECK:       @ %bb.0:
913; CHECK-NEXT:    ldrb r1, [r0]
914; CHECK-NEXT:    ldrb r0, [r0, #1]
915; CHECK-NEXT:    lsl r0, r0, #24
916; CHECK-NEXT:    orr r0, r0, r1, lsl #16
917; CHECK-NEXT:    mov pc, lr
918;
919; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16:
920; CHECK-ARMv6:       @ %bb.0:
921; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
922; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
923; CHECK-ARMv6-NEXT:    lsl r0, r0, #24
924; CHECK-ARMv6-NEXT:    orr r0, r0, r1, lsl #16
925; CHECK-ARMv6-NEXT:    bx lr
926;
927; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_shl_16:
928; CHECK-THUMBv6:       @ %bb.0:
929; CHECK-THUMBv6-NEXT:    ldrb r1, [r0]
930; CHECK-THUMBv6-NEXT:    lsls r1, r1, #16
931; CHECK-THUMBv6-NEXT:    ldrb r0, [r0, #1]
932; CHECK-THUMBv6-NEXT:    lsls r0, r0, #24
933; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
934; CHECK-THUMBv6-NEXT:    bx lr
935;
936; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_shl_16:
937; CHECK-THUMBv7:       @ %bb.0:
938; CHECK-THUMBv7-NEXT:    ldrb r1, [r0]
939; CHECK-THUMBv7-NEXT:    lsls r1, r1, #16
940; CHECK-THUMBv7-NEXT:    ldrb r0, [r0, #1]
941; CHECK-THUMBv7-NEXT:    lsls r0, r0, #24
942; CHECK-THUMBv7-NEXT:    adds r0, r0, r1
943; CHECK-THUMBv7-NEXT:    bx lr
944
945  %tmp = bitcast i32* %arg to i8*
946  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
947  %tmp2 = load i8, i8* %tmp1, align 2
948  %tmp3 = zext i8 %tmp2 to i32
949  %tmp30 = shl nuw nsw i32 %tmp3, 16
950  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
951  %tmp5 = load i8, i8* %tmp4, align 1
952  %tmp6 = zext i8 %tmp5 to i32
953  %tmp7 = shl nuw nsw i32 %tmp6, 24
954  %tmp8 = or i32 %tmp7, %tmp30
955  ret i32 %tmp8
956}
957
958; i8* p; // p is 2 byte aligned
959; (i32) p[1] | ((i32) p[0] << 8)
960define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
961; CHECK-LABEL: zext_load_i32_by_i8_bswap:
962; CHECK:       @ %bb.0:
963; CHECK-NEXT:    ldrh r0, [r0]
964; CHECK-NEXT:    mov pc, lr
965;
966; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
967; CHECK-ARMv6:       @ %bb.0:
968; CHECK-ARMv6-NEXT:    ldrh r0, [r0]
969; CHECK-ARMv6-NEXT:    bx lr
970;
971; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
972; CHECK-THUMBv6:       @ %bb.0:
973; CHECK-THUMBv6-NEXT:    ldrh r0, [r0]
974; CHECK-THUMBv6-NEXT:    bx lr
975;
976; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
977; CHECK-THUMBv7:       @ %bb.0:
978; CHECK-THUMBv7-NEXT:    ldrh r0, [r0]
979; CHECK-THUMBv7-NEXT:    bx lr
980
981  %tmp = bitcast i32* %arg to i8*
982  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
983  %tmp2 = load i8, i8* %tmp1, align 1
984  %tmp3 = zext i8 %tmp2 to i32
985  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
986  %tmp5 = load i8, i8* %tmp4, align 2
987  %tmp6 = zext i8 %tmp5 to i32
988  %tmp7 = shl nuw nsw i32 %tmp6, 8
989  %tmp8 = or i32 %tmp7, %tmp3
990  ret i32 %tmp8
991}
992
993; i8* p; // p is 2 byte aligned
994; ((i32) p[1] << 8) | ((i32) p[0] << 16)
995define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
996; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
997; CHECK:       @ %bb.0:
998; CHECK-NEXT:    ldrb r1, [r0]
999; CHECK-NEXT:    ldrb r0, [r0, #1]
1000; CHECK-NEXT:    lsl r1, r1, #16
1001; CHECK-NEXT:    orr r0, r1, r0, lsl #8
1002; CHECK-NEXT:    mov pc, lr
1003;
1004; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1005; CHECK-ARMv6:       @ %bb.0:
1006; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
1007; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
1008; CHECK-ARMv6-NEXT:    lsl r1, r1, #16
1009; CHECK-ARMv6-NEXT:    orr r0, r1, r0, lsl #8
1010; CHECK-ARMv6-NEXT:    bx lr
1011;
1012; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1013; CHECK-THUMBv6:       @ %bb.0:
1014; CHECK-THUMBv6-NEXT:    ldrb r1, [r0, #1]
1015; CHECK-THUMBv6-NEXT:    lsls r1, r1, #8
1016; CHECK-THUMBv6-NEXT:    ldrb r0, [r0]
1017; CHECK-THUMBv6-NEXT:    lsls r0, r0, #16
1018; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
1019; CHECK-THUMBv6-NEXT:    bx lr
1020;
1021; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1022; CHECK-THUMBv7:       @ %bb.0:
1023; CHECK-THUMBv7-NEXT:    ldrb r1, [r0, #1]
1024; CHECK-THUMBv7-NEXT:    lsls r1, r1, #8
1025; CHECK-THUMBv7-NEXT:    ldrb r0, [r0]
1026; CHECK-THUMBv7-NEXT:    lsls r0, r0, #16
1027; CHECK-THUMBv7-NEXT:    adds r0, r0, r1
1028; CHECK-THUMBv7-NEXT:    bx lr
1029
1030  %tmp = bitcast i32* %arg to i8*
1031  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1032  %tmp2 = load i8, i8* %tmp1, align 1
1033  %tmp3 = zext i8 %tmp2 to i32
1034  %tmp30 = shl nuw nsw i32 %tmp3, 8
1035  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1036  %tmp5 = load i8, i8* %tmp4, align 2
1037  %tmp6 = zext i8 %tmp5 to i32
1038  %tmp7 = shl nuw nsw i32 %tmp6, 16
1039  %tmp8 = or i32 %tmp7, %tmp30
1040  ret i32 %tmp8
1041}
1042
1043; i8* p; // p is 2 byte aligned
1044; ((i32) p[1] << 16) | ((i32) p[0] << 24)
1045define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
1046; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1047; CHECK:       @ %bb.0:
1048; CHECK-NEXT:    ldrb r1, [r0]
1049; CHECK-NEXT:    ldrb r0, [r0, #1]
1050; CHECK-NEXT:    lsl r1, r1, #24
1051; CHECK-NEXT:    orr r0, r1, r0, lsl #16
1052; CHECK-NEXT:    mov pc, lr
1053;
1054; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1055; CHECK-ARMv6:       @ %bb.0:
1056; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
1057; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
1058; CHECK-ARMv6-NEXT:    lsl r1, r1, #24
1059; CHECK-ARMv6-NEXT:    orr r0, r1, r0, lsl #16
1060; CHECK-ARMv6-NEXT:    bx lr
1061;
1062; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1063; CHECK-THUMBv6:       @ %bb.0:
1064; CHECK-THUMBv6-NEXT:    ldrb r1, [r0, #1]
1065; CHECK-THUMBv6-NEXT:    lsls r1, r1, #16
1066; CHECK-THUMBv6-NEXT:    ldrb r0, [r0]
1067; CHECK-THUMBv6-NEXT:    lsls r0, r0, #24
1068; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
1069; CHECK-THUMBv6-NEXT:    bx lr
1070;
1071; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1072; CHECK-THUMBv7:       @ %bb.0:
1073; CHECK-THUMBv7-NEXT:    ldrb r1, [r0, #1]
1074; CHECK-THUMBv7-NEXT:    lsls r1, r1, #16
1075; CHECK-THUMBv7-NEXT:    ldrb r0, [r0]
1076; CHECK-THUMBv7-NEXT:    lsls r0, r0, #24
1077; CHECK-THUMBv7-NEXT:    adds r0, r0, r1
1078; CHECK-THUMBv7-NEXT:    bx lr
1079
1080  %tmp = bitcast i32* %arg to i8*
1081  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1082  %tmp2 = load i8, i8* %tmp1, align 1
1083  %tmp3 = zext i8 %tmp2 to i32
1084  %tmp30 = shl nuw nsw i32 %tmp3, 16
1085  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1086  %tmp5 = load i8, i8* %tmp4, align 2
1087  %tmp6 = zext i8 %tmp5 to i32
1088  %tmp7 = shl nuw nsw i32 %tmp6, 24
1089  %tmp8 = or i32 %tmp7, %tmp30
1090  ret i32 %tmp8
1091}
1092
1093; i8* p;
1094; i16* p1.i16 = (i16*) p;
1095; (p1.i16[0] << 8) | ((i16) p[2])
1096;
1097; This is essentialy a i16 load from p[1], but we don't fold the pattern now
1098; because in the original DAG we don't have p[1] address available
1099define i16 @load_i16_from_nonzero_offset(i8* %p) {
1100; CHECK-LABEL: load_i16_from_nonzero_offset:
1101; CHECK:       @ %bb.0:
1102; CHECK-NEXT:    ldrh r1, [r0]
1103; CHECK-NEXT:    ldrb r0, [r0, #2]
1104; CHECK-NEXT:    orr r0, r0, r1, lsl #8
1105; CHECK-NEXT:    mov pc, lr
1106;
1107; CHECK-ARMv6-LABEL: load_i16_from_nonzero_offset:
1108; CHECK-ARMv6:       @ %bb.0:
1109; CHECK-ARMv6-NEXT:    ldrh r1, [r0]
1110; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #2]
1111; CHECK-ARMv6-NEXT:    orr r0, r0, r1, lsl #8
1112; CHECK-ARMv6-NEXT:    bx lr
1113;
1114; CHECK-THUMBv6-LABEL: load_i16_from_nonzero_offset:
1115; CHECK-THUMBv6:       @ %bb.0:
1116; CHECK-THUMBv6-NEXT:    ldrb r1, [r0, #2]
1117; CHECK-THUMBv6-NEXT:    ldrh r0, [r0]
1118; CHECK-THUMBv6-NEXT:    lsls r0, r0, #8
1119; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
1120; CHECK-THUMBv6-NEXT:    bx lr
1121;
1122; CHECK-THUMBv7-LABEL: load_i16_from_nonzero_offset:
1123; CHECK-THUMBv7:       @ %bb.0:
1124; CHECK-THUMBv7-NEXT:    ldrb r1, [r0, #2]
1125; CHECK-THUMBv7-NEXT:    ldrh r0, [r0]
1126; CHECK-THUMBv7-NEXT:    lsls r0, r0, #8
1127; CHECK-THUMBv7-NEXT:    adds r0, r0, r1
1128; CHECK-THUMBv7-NEXT:    bx lr
1129
1130  %p1.i16 = bitcast i8* %p to i16*
1131  %p2.i8 = getelementptr i8, i8* %p, i64 2
1132  %v1 = load i16, i16* %p1.i16
1133  %v2.i8 = load i8, i8* %p2.i8
1134  %v2 = zext i8 %v2.i8 to i16
1135  %v1.shl = shl i16 %v1, 8
1136  %res = or i16 %v1.shl, %v2
1137  ret i16 %res
1138}
1139