1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s
6
7; FIXME:
8; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
9
10define <3 x i32> @v_load_constant_v3i32_align1(<3 x i32> addrspace(4)* %ptr) {
11; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
12; GFX9-UNALIGNED:       ; %bb.0:
13; GFX9-UNALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14; GFX9-UNALIGNED-NEXT:    global_load_dwordx3 v[0:2], v[0:1], off
15; GFX9-UNALIGNED-NEXT:    s_waitcnt vmcnt(0)
16; GFX9-UNALIGNED-NEXT:    s_setpc_b64 s[30:31]
17;
18; GFX9-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1:
19; GFX9-NOUNALIGNED:       ; %bb.0:
20; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GFX9-NOUNALIGNED-NEXT:    v_add_co_u32_e32 v2, vcc, 11, v0
22; GFX9-NOUNALIGNED-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
23; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v0, v[0:1], off
24; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v1, v[2:3], off offset:-10
25; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v4, v[2:3], off offset:-9
26; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v5, v[2:3], off offset:-8
27; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v6, v[2:3], off offset:-7
28; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v7, v[2:3], off offset:-6
29; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v8, v[2:3], off offset:-5
30; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v9, v[2:3], off offset:-4
31; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v10, v[2:3], off offset:-3
32; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v11, v[2:3], off offset:-2
33; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v12, v[2:3], off offset:-1
34; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v2, v[2:3], off
35; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v3, 0xff
36; GFX9-NOUNALIGNED-NEXT:    s_movk_i32 s4, 0xff
37; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v13, 8
38; GFX9-NOUNALIGNED-NEXT:    s_mov_b32 s5, 8
39; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(10)
40; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
41; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(9)
42; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s4, v4
43; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(8)
44; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, s4, v5
45; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v0, s4, v1
46; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(6)
47; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v7, v13, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
48; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(5)
49; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v8, v8, v3
50; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(4)
51; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v9, v9, v3
52; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 16, v4
53; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
54; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v11, v13, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
55; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
56; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v12, v12, v3
57; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
58; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, v2, v3
59; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v4, 24, v5
60; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v5, v6, v3, v7
61; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v6, 16, v8
62; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v7, 24, v9
63; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v3, v10, v3, v11
64; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v8, 16, v12
65; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
66; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v0, v0, v1, v4
67; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v1, v5, v6, v7
68; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v2, v3, v8, v2
69; GFX9-NOUNALIGNED-NEXT:    s_setpc_b64 s[30:31]
70;
71; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
72; GFX7-UNALIGNED:       ; %bb.0:
73; GFX7-UNALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74; GFX7-UNALIGNED-NEXT:    s_mov_b32 s6, 0
75; GFX7-UNALIGNED-NEXT:    s_mov_b32 s7, 0xf000
76; GFX7-UNALIGNED-NEXT:    s_mov_b64 s[4:5], 0
77; GFX7-UNALIGNED-NEXT:    buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
78; GFX7-UNALIGNED-NEXT:    s_waitcnt vmcnt(0)
79; GFX7-UNALIGNED-NEXT:    s_setpc_b64 s[30:31]
80;
81; GFX7-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1:
82; GFX7-NOUNALIGNED:       ; %bb.0:
83; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s6, 0
85; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s7, 0xf000
86; GFX7-NOUNALIGNED-NEXT:    s_mov_b64 s[4:5], 0
87; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v2, v[0:1], s[4:7], 0 addr64
88; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v3, v[0:1], s[4:7], 0 addr64 offset:1
89; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v4, v[0:1], s[4:7], 0 addr64 offset:2
90; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v5, v[0:1], s[4:7], 0 addr64 offset:3
91; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v6, v[0:1], s[4:7], 0 addr64 offset:4
92; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v7, v[0:1], s[4:7], 0 addr64 offset:5
93; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v8, v[0:1], s[4:7], 0 addr64 offset:6
94; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v9, v[0:1], s[4:7], 0 addr64 offset:7
95; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v10, v[0:1], s[4:7], 0 addr64 offset:8
96; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v11, v[0:1], s[4:7], 0 addr64 offset:9
97; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v12, v[0:1], s[4:7], 0 addr64 offset:10
98; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:11
99; GFX7-NOUNALIGNED-NEXT:    v_mov_b32_e32 v1, 0xff
100; GFX7-NOUNALIGNED-NEXT:    s_movk_i32 s4, 0xff
101; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(11)
102; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, s4, v2
103; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(10)
104; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, s4, v3
105; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(9)
106; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s4, v4
107; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(8)
108; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, s4, v5
109; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(7)
110; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v6, s4, v6
111; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(6)
112; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v7, v7, v1
113; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(5)
114; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v8, v8, v1
115; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(4)
116; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v9, v9, v1
117; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(3)
118; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v10, v10, v1
119; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
120; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v11, v11, v1
121; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
122; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v12, v12, v1
123; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
124; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, v0, v1
125; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 8, v3
126; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v3, 16, v4
127; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v4, 24, v5
128; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 8, v7
129; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
130; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v8, 24, v9
131; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v9, 8, v11
132; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v11, 16, v12
133; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v12, 24, v0
134; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v2, v1
135; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v6, v5
136; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v10, v9
137; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v3
138; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v1, v7
139; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v2, v11
140; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v4
141; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v1, v8
142; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v2, v12
143; GFX7-NOUNALIGNED-NEXT:    s_setpc_b64 s[30:31]
144  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 1
145  ret <3 x i32> %load
146}
147
148define <3 x i32> @v_load_constant_v3i32_align2(<3 x i32> addrspace(4)* %ptr) {
149; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align2:
150; GFX9-UNALIGNED:       ; %bb.0:
151; GFX9-UNALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152; GFX9-UNALIGNED-NEXT:    global_load_dwordx3 v[0:2], v[0:1], off
153; GFX9-UNALIGNED-NEXT:    s_waitcnt vmcnt(0)
154; GFX9-UNALIGNED-NEXT:    s_setpc_b64 s[30:31]
155;
156; GFX9-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2:
157; GFX9-NOUNALIGNED:       ; %bb.0:
158; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159; GFX9-NOUNALIGNED-NEXT:    v_add_co_u32_e32 v2, vcc, 10, v0
160; GFX9-NOUNALIGNED-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
161; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v0, v[0:1], off
162; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v1, v[2:3], off offset:-8
163; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v4, v[2:3], off offset:-6
164; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v5, v[2:3], off offset:-4
165; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v6, v[2:3], off offset:-2
166; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v2, v[2:3], off
167; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v3, 0xffff
168; GFX9-NOUNALIGNED-NEXT:    s_mov_b32 s4, 0xffff
169; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(4)
170; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s4, v1
171; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
172; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
173; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, v5, v3
174; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
175; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
176; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, v2, v3
177; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
178; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v0, s4, v1
179; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v1, v4, v3, v5
180; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v2, v6, v3, v2
181; GFX9-NOUNALIGNED-NEXT:    s_setpc_b64 s[30:31]
182;
183; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align2:
184; GFX7-UNALIGNED:       ; %bb.0:
185; GFX7-UNALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186; GFX7-UNALIGNED-NEXT:    s_mov_b32 s6, 0
187; GFX7-UNALIGNED-NEXT:    s_mov_b32 s7, 0xf000
188; GFX7-UNALIGNED-NEXT:    s_mov_b64 s[4:5], 0
189; GFX7-UNALIGNED-NEXT:    buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
190; GFX7-UNALIGNED-NEXT:    s_waitcnt vmcnt(0)
191; GFX7-UNALIGNED-NEXT:    s_setpc_b64 s[30:31]
192;
193; GFX7-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2:
194; GFX7-NOUNALIGNED:       ; %bb.0:
195; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s6, 0
197; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s7, 0xf000
198; GFX7-NOUNALIGNED-NEXT:    s_mov_b64 s[4:5], 0
199; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64
200; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:2
201; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:4
202; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:6
203; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:8
204; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:10
205; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s4, 0xffff
206; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(5)
207; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s4, v2
208; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(4)
209; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, s4, v3
210; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(3)
211; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, s4, v4
212; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
213; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s4, v5
214; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
215; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, s4, v6
216; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
217; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, s4, v0
218; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
219; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v6, 16, v0
220; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
221; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v1, v2
222; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v3, v4
223; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v5, v6
224; GFX7-NOUNALIGNED-NEXT:    s_setpc_b64 s[30:31]
225  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 2
226  ret <3 x i32> %load
227}
228
229define <3 x i32> @v_load_constant_v3i32_align4(<3 x i32> addrspace(4)* %ptr) {
230; GFX9-LABEL: v_load_constant_v3i32_align4:
231; GFX9:       ; %bb.0:
232; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
233; GFX9-NEXT:    global_load_dwordx3 v[0:2], v[0:1], off
234; GFX9-NEXT:    s_waitcnt vmcnt(0)
235; GFX9-NEXT:    s_setpc_b64 s[30:31]
236;
237; GFX7-LABEL: v_load_constant_v3i32_align4:
238; GFX7:       ; %bb.0:
239; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240; GFX7-NEXT:    s_mov_b32 s6, 0
241; GFX7-NEXT:    s_mov_b32 s7, 0xf000
242; GFX7-NEXT:    s_mov_b64 s[4:5], 0
243; GFX7-NEXT:    buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
244; GFX7-NEXT:    s_waitcnt vmcnt(0)
245; GFX7-NEXT:    s_setpc_b64 s[30:31]
246  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4
247  ret <3 x i32> %load
248}
249
250define i96 @v_load_constant_i96_align8(i96 addrspace(4)* %ptr) {
251; GFX9-LABEL: v_load_constant_i96_align8:
252; GFX9:       ; %bb.0:
253; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; GFX9-NEXT:    global_load_dwordx3 v[0:2], v[0:1], off
255; GFX9-NEXT:    s_waitcnt vmcnt(0)
256; GFX9-NEXT:    s_setpc_b64 s[30:31]
257;
258; GFX7-LABEL: v_load_constant_i96_align8:
259; GFX7:       ; %bb.0:
260; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261; GFX7-NEXT:    s_mov_b32 s6, 0
262; GFX7-NEXT:    s_mov_b32 s7, 0xf000
263; GFX7-NEXT:    s_mov_b64 s[4:5], 0
264; GFX7-NEXT:    buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
265; GFX7-NEXT:    s_waitcnt vmcnt(0)
266; GFX7-NEXT:    s_setpc_b64 s[30:31]
267  %load = load i96, i96 addrspace(4)* %ptr, align 8
268  ret i96 %load
269}
270
271define <3 x i32> @v_load_constant_v3i32_align8(<3 x i32> addrspace(4)* %ptr) {
272; GFX9-LABEL: v_load_constant_v3i32_align8:
273; GFX9:       ; %bb.0:
274; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275; GFX9-NEXT:    global_load_dwordx3 v[0:2], v[0:1], off
276; GFX9-NEXT:    s_waitcnt vmcnt(0)
277; GFX9-NEXT:    s_setpc_b64 s[30:31]
278;
279; GFX7-LABEL: v_load_constant_v3i32_align8:
280; GFX7:       ; %bb.0:
281; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282; GFX7-NEXT:    s_mov_b32 s6, 0
283; GFX7-NEXT:    s_mov_b32 s7, 0xf000
284; GFX7-NEXT:    s_mov_b64 s[4:5], 0
285; GFX7-NEXT:    buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
286; GFX7-NEXT:    s_waitcnt vmcnt(0)
287; GFX7-NEXT:    s_setpc_b64 s[30:31]
288  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 8
289  ret <3 x i32> %load
290}
291
292define <6 x i16> @v_load_constant_v6i16_align8(<6 x i16> addrspace(4)* %ptr) {
293; GFX9-LABEL: v_load_constant_v6i16_align8:
294; GFX9:       ; %bb.0:
295; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296; GFX9-NEXT:    global_load_dwordx3 v[0:2], v[0:1], off
297; GFX9-NEXT:    s_waitcnt vmcnt(0)
298; GFX9-NEXT:    s_setpc_b64 s[30:31]
299;
300; GFX7-LABEL: v_load_constant_v6i16_align8:
301; GFX7:       ; %bb.0:
302; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
303; GFX7-NEXT:    s_mov_b32 s6, 0
304; GFX7-NEXT:    s_mov_b32 s7, 0xf000
305; GFX7-NEXT:    s_mov_b64 s[4:5], 0
306; GFX7-NEXT:    buffer_load_dwordx3 v[6:8], v[0:1], s[4:7], 0 addr64
307; GFX7-NEXT:    s_waitcnt vmcnt(0)
308; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v6
309; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v7
310; GFX7-NEXT:    v_lshrrev_b32_e32 v5, 16, v8
311; GFX7-NEXT:    v_mov_b32_e32 v0, v6
312; GFX7-NEXT:    v_mov_b32_e32 v2, v7
313; GFX7-NEXT:    v_mov_b32_e32 v4, v8
314; GFX7-NEXT:    s_setpc_b64 s[30:31]
315  %load = load <6 x i16>, <6 x i16> addrspace(4)* %ptr, align 8
316  ret <6 x i16> %load
317}
318
319define <12 x i8> @v_load_constant_v12i8_align8(<12 x i8> addrspace(4)* %ptr) {
320; GFX9-LABEL: v_load_constant_v12i8_align8:
321; GFX9:       ; %bb.0:
322; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323; GFX9-NEXT:    global_load_dwordx3 v[0:2], v[0:1], off
324; GFX9-NEXT:    s_waitcnt vmcnt(0)
325; GFX9-NEXT:    v_lshrrev_b32_e32 v13, 8, v0
326; GFX9-NEXT:    v_lshrrev_b32_e32 v12, 16, v0
327; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
328; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
329; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
330; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
331; GFX9-NEXT:    v_mov_b32_e32 v4, v1
332; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
333; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
334; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
335; GFX9-NEXT:    v_mov_b32_e32 v8, v2
336; GFX9-NEXT:    v_mov_b32_e32 v1, v13
337; GFX9-NEXT:    v_mov_b32_e32 v2, v12
338; GFX9-NEXT:    s_setpc_b64 s[30:31]
339;
340; GFX7-LABEL: v_load_constant_v12i8_align8:
341; GFX7:       ; %bb.0:
342; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343; GFX7-NEXT:    s_mov_b32 s6, 0
344; GFX7-NEXT:    s_mov_b32 s7, 0xf000
345; GFX7-NEXT:    s_mov_b64 s[4:5], 0
346; GFX7-NEXT:    buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
347; GFX7-NEXT:    s_waitcnt vmcnt(0)
348; GFX7-NEXT:    v_lshrrev_b32_e32 v13, 8, v0
349; GFX7-NEXT:    v_lshrrev_b32_e32 v12, 16, v0
350; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
351; GFX7-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
352; GFX7-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
353; GFX7-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
354; GFX7-NEXT:    v_mov_b32_e32 v4, v1
355; GFX7-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
356; GFX7-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
357; GFX7-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
358; GFX7-NEXT:    v_mov_b32_e32 v8, v2
359; GFX7-NEXT:    v_mov_b32_e32 v1, v13
360; GFX7-NEXT:    v_mov_b32_e32 v2, v12
361; GFX7-NEXT:    s_setpc_b64 s[30:31]
362  %load = load <12 x i8>, <12 x i8> addrspace(4)* %ptr, align 8
363  ret <12 x i8> %load
364}
365
366define <3 x i32> @v_load_constant_v3i32_align16(<3 x i32> addrspace(4)* %ptr) {
367; GFX9-LABEL: v_load_constant_v3i32_align16:
368; GFX9:       ; %bb.0:
369; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370; GFX9-NEXT:    global_load_dwordx3 v[0:2], v[0:1], off
371; GFX9-NEXT:    s_waitcnt vmcnt(0)
372; GFX9-NEXT:    s_setpc_b64 s[30:31]
373;
374; GFX7-LABEL: v_load_constant_v3i32_align16:
375; GFX7:       ; %bb.0:
376; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377; GFX7-NEXT:    s_mov_b32 s6, 0
378; GFX7-NEXT:    s_mov_b32 s7, 0xf000
379; GFX7-NEXT:    s_mov_b64 s[4:5], 0
380; GFX7-NEXT:    buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
381; GFX7-NEXT:    s_waitcnt vmcnt(0)
382; GFX7-NEXT:    s_setpc_b64 s[30:31]
383  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 16
384  ret <3 x i32> %load
385}
386
387define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(<3 x i32> addrspace(4)* inreg %ptr) {
388; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align1:
389; GFX9-UNALIGNED:       ; %bb.0:
390; GFX9-UNALIGNED-NEXT:    v_mov_b32_e32 v0, 0
391; GFX9-UNALIGNED-NEXT:    global_load_dwordx3 v[0:2], v0, s[0:1]
392; GFX9-UNALIGNED-NEXT:    s_waitcnt vmcnt(0)
393; GFX9-UNALIGNED-NEXT:    v_readfirstlane_b32 s0, v0
394; GFX9-UNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
395; GFX9-UNALIGNED-NEXT:    v_readfirstlane_b32 s2, v2
396; GFX9-UNALIGNED-NEXT:    ; return to shader part epilog
397;
398; GFX9-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1:
399; GFX9-NOUNALIGNED:       ; %bb.0:
400; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v0, 0
401; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v1, v0, s[0:1]
402; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v2, v0, s[0:1] offset:1
403; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v3, v0, s[0:1] offset:2
404; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v4, v0, s[0:1] offset:3
405; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v5, v0, s[0:1] offset:4
406; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v6, v0, s[0:1] offset:5
407; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v7, v0, s[0:1] offset:6
408; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v8, v0, s[0:1] offset:7
409; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v9, v0, s[0:1] offset:8
410; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v10, v0, s[0:1] offset:9
411; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v11, v0, s[0:1] offset:10
412; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:11
413; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v12, 0xff
414; GFX9-NOUNALIGNED-NEXT:    s_movk_i32 s0, 0xff
415; GFX9-NOUNALIGNED-NEXT:    s_mov_b32 s1, 8
416; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v13, 8
417; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(10)
418; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v2, s1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
419; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(9)
420; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, s0, v3
421; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(8)
422; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s0, v4
423; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v1, v1, s0, v2
424; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(6)
425; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v6, s1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
426; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(5)
427; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v7, v7, v12
428; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(4)
429; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v8, v8, v12
430; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
431; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
432; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v10, v13, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
433; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
434; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v11, v11, v12
435; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
436; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, v0, v12
437; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
438; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v4, v5, s0, v6
439; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 16, v7
440; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v6, 24, v8
441; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v7, v9, v12, v10
442; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v9, 24, v0
443; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v8, 16, v11
444; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v0, v1, v2, v3
445; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v1, v4, v5, v6
446; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v2, v7, v8, v9
447; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s0, v0
448; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
449; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s2, v2
450; GFX9-NOUNALIGNED-NEXT:    ; return to shader part epilog
451;
452; GFX7-UNALIGNED-LABEL: s_load_constant_v3i32_align1:
453; GFX7-UNALIGNED:       ; %bb.0:
454; GFX7-UNALIGNED-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x0
455; GFX7-UNALIGNED-NEXT:    s_load_dword s0, s[0:1], 0x2
456; GFX7-UNALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
457; GFX7-UNALIGNED-NEXT:    v_mov_b32_e32 v0, s6
458; GFX7-UNALIGNED-NEXT:    v_mov_b32_e32 v2, s8
459; GFX7-UNALIGNED-NEXT:    v_mov_b32_e32 v2, s0
460; GFX7-UNALIGNED-NEXT:    v_mov_b32_e32 v1, s7
461; GFX7-UNALIGNED-NEXT:    v_readfirstlane_b32 s0, v0
462; GFX7-UNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
463; GFX7-UNALIGNED-NEXT:    v_readfirstlane_b32 s2, v2
464; GFX7-UNALIGNED-NEXT:    ; return to shader part epilog
465;
466; GFX7-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1:
467; GFX7-NOUNALIGNED:       ; %bb.0:
468; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s2, -1
469; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s3, 0xf000
470; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
471; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v1, off, s[0:3], 0 offset:1
472; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v2, off, s[0:3], 0 offset:2
473; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v3, off, s[0:3], 0 offset:3
474; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v4, off, s[0:3], 0 offset:4
475; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v5, off, s[0:3], 0 offset:5
476; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v6, off, s[0:3], 0 offset:6
477; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v7, off, s[0:3], 0 offset:7
478; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v8, off, s[0:3], 0 offset:8
479; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v9, off, s[0:3], 0 offset:9
480; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v10, off, s[0:3], 0 offset:10
481; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v11, off, s[0:3], 0 offset:11
482; GFX7-NOUNALIGNED-NEXT:    v_mov_b32_e32 v12, 0xff
483; GFX7-NOUNALIGNED-NEXT:    s_movk_i32 s0, 0xff
484; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(11)
485; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, s0, v0
486; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(10)
487; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s0, v1
488; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(9)
489; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, s0, v2
490; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
491; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(7)
492; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s0, v4
493; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(6)
494; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, v5, v12
495; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(5)
496; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v6, v6, v12
497; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
498; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(3)
499; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v8, v8, v12
500; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
501; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v9, v9, v12
502; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
503; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v10, v10, v12
504; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v9, 8, v9
505; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, s0, v3
506; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v7, v7, v12
507; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
508; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v11, v11, v12
509; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v1
510; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
511; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v4, v5
512; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
513; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
514; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v4, v8, v9
515; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v2
516; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
517; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
518; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v1, v6
519; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v11, 24, v11
520; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v4, v10
521; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v3
522; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v1, v7
523; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v2, v11
524; GFX7-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s0, v0
525; GFX7-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
526; GFX7-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s2, v2
527; GFX7-NOUNALIGNED-NEXT:    ; return to shader part epilog
528  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 1
529  ret <3 x i32> %load
530}
531
532define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)* inreg %ptr) {
533; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align2:
534; GFX9-UNALIGNED:       ; %bb.0:
535; GFX9-UNALIGNED-NEXT:    v_mov_b32_e32 v0, 0
536; GFX9-UNALIGNED-NEXT:    global_load_dwordx3 v[0:2], v0, s[0:1]
537; GFX9-UNALIGNED-NEXT:    s_waitcnt vmcnt(0)
538; GFX9-UNALIGNED-NEXT:    v_readfirstlane_b32 s0, v0
539; GFX9-UNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
540; GFX9-UNALIGNED-NEXT:    v_readfirstlane_b32 s2, v2
541; GFX9-UNALIGNED-NEXT:    ; return to shader part epilog
542;
543; GFX9-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2:
544; GFX9-NOUNALIGNED:       ; %bb.0:
545; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v0, 0
546; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v1, v0, s[0:1]
547; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v2, v0, s[0:1] offset:2
548; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v3, v0, s[0:1] offset:4
549; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v4, v0, s[0:1] offset:6
550; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v5, v0, s[0:1] offset:8
551; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v0, v0, s[0:1] offset:10
552; GFX9-NOUNALIGNED-NEXT:    s_mov_b32 s0, 0xffff
553; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(4)
554; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, s0, v2
555; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
556; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
557; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s0, v4
558; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
559; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
560; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, s0, v0
561; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v6, 16, v0
562; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v1, s0, v2
563; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v1, v3, s0, v4
564; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v2, v5, s0, v6
565; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s0, v0
566; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
567; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s2, v2
568; GFX9-NOUNALIGNED-NEXT:    ; return to shader part epilog
569;
570; GFX7-UNALIGNED-LABEL: s_load_constant_v3i32_align2:
571; GFX7-UNALIGNED:       ; %bb.0:
572; GFX7-UNALIGNED-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x0
573; GFX7-UNALIGNED-NEXT:    s_load_dword s0, s[0:1], 0x2
574; GFX7-UNALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
575; GFX7-UNALIGNED-NEXT:    v_mov_b32_e32 v0, s6
576; GFX7-UNALIGNED-NEXT:    v_mov_b32_e32 v2, s8
577; GFX7-UNALIGNED-NEXT:    v_mov_b32_e32 v2, s0
578; GFX7-UNALIGNED-NEXT:    v_mov_b32_e32 v1, s7
579; GFX7-UNALIGNED-NEXT:    v_readfirstlane_b32 s0, v0
580; GFX7-UNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
581; GFX7-UNALIGNED-NEXT:    v_readfirstlane_b32 s2, v2
582; GFX7-UNALIGNED-NEXT:    ; return to shader part epilog
583;
584; GFX7-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2:
585; GFX7-NOUNALIGNED:       ; %bb.0:
586; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s2, -1
587; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s3, 0xf000
588; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
589; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v1, off, s[0:3], 0 offset:2
590; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v2, off, s[0:3], 0 offset:4
591; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v3, off, s[0:3], 0 offset:6
592; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v4, off, s[0:3], 0 offset:8
593; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v5, off, s[0:3], 0 offset:10
594; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s0, 0xffff
595; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(5)
596; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, s0, v0
597; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(4)
598; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s0, v1
599; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
600; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
601; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, s0, v3
602; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, s0, v2
603; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
604; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, s0, v5
605; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s0, v4
606; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
607; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
608; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v1
609; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v2, v3
610; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v4, v5
611; GFX7-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s0, v0
612; GFX7-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
613; GFX7-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s2, v2
614; GFX7-NOUNALIGNED-NEXT:    ; return to shader part epilog
615  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 2
616  ret <3 x i32> %load
617}
618
619define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* inreg %ptr) {
620; GFX9-LABEL: s_load_constant_v3i32_align4:
621; GFX9:       ; %bb.0:
622; GFX9-NEXT:    s_mov_b32 s2, s0
623; GFX9-NEXT:    s_mov_b32 s3, s1
624; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
625; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x8
626; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
627; GFX9-NEXT:    ; return to shader part epilog
628;
629; GFX7-LABEL: s_load_constant_v3i32_align4:
630; GFX7:       ; %bb.0:
631; GFX7-NEXT:    s_mov_b32 s2, s0
632; GFX7-NEXT:    s_mov_b32 s3, s1
633; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
634; GFX7-NEXT:    s_load_dword s2, s[2:3], 0x2
635; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
636; GFX7-NEXT:    ; return to shader part epilog
637  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4
638  ret <3 x i32> %load
639}
640
641define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) {
642; GFX9-LABEL: s_load_constant_i96_align8:
643; GFX9:       ; %bb.0:
644; GFX9-NEXT:    s_mov_b32 s2, s0
645; GFX9-NEXT:    s_mov_b32 s3, s1
646; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
647; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x8
648; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
649; GFX9-NEXT:    ; return to shader part epilog
650;
651; GFX7-LABEL: s_load_constant_i96_align8:
652; GFX7:       ; %bb.0:
653; GFX7-NEXT:    s_mov_b32 s2, s0
654; GFX7-NEXT:    s_mov_b32 s3, s1
655; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
656; GFX7-NEXT:    s_load_dword s2, s[2:3], 0x2
657; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
658; GFX7-NEXT:    ; return to shader part epilog
659  %load = load i96, i96 addrspace(4)* %ptr, align 8
660  ret i96 %load
661}
662
663define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)* inreg %ptr) {
664; GFX9-LABEL: s_load_constant_v3i32_align8:
665; GFX9:       ; %bb.0:
666; GFX9-NEXT:    s_mov_b32 s2, s0
667; GFX9-NEXT:    s_mov_b32 s3, s1
668; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
669; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x8
670; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
671; GFX9-NEXT:    ; return to shader part epilog
672;
673; GFX7-LABEL: s_load_constant_v3i32_align8:
674; GFX7:       ; %bb.0:
675; GFX7-NEXT:    s_mov_b32 s2, s0
676; GFX7-NEXT:    s_mov_b32 s3, s1
677; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
678; GFX7-NEXT:    s_load_dword s2, s[2:3], 0x2
679; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
680; GFX7-NEXT:    ; return to shader part epilog
681  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 8
682  ret <3 x i32> %load
683}
684
685define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(<6 x i16> addrspace(4)* inreg %ptr) {
686; GFX9-LABEL: s_load_constant_v6i16_align8:
687; GFX9:       ; %bb.0:
688; GFX9-NEXT:    s_mov_b32 s2, s0
689; GFX9-NEXT:    s_mov_b32 s3, s1
690; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
691; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x8
692; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
693; GFX9-NEXT:    ; return to shader part epilog
694;
695; GFX7-LABEL: s_load_constant_v6i16_align8:
696; GFX7:       ; %bb.0:
697; GFX7-NEXT:    s_mov_b32 s2, s0
698; GFX7-NEXT:    s_mov_b32 s3, s1
699; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
700; GFX7-NEXT:    s_load_dword s2, s[2:3], 0x2
701; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
702; GFX7-NEXT:    ; return to shader part epilog
703  %load = load <6 x i16>, <6 x i16> addrspace(4)* %ptr, align 8
704  %cast = bitcast <6 x i16> %load to <3 x i32>
705  ret <3 x i32> %cast
706}
707
708define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(<12 x i8> addrspace(4)* inreg %ptr) {
709; GFX9-LABEL: s_load_constant_v12i8_align8:
710; GFX9:       ; %bb.0:
711; GFX9-NEXT:    s_load_dwordx2 s[12:13], s[0:1], 0x0
712; GFX9-NEXT:    s_load_dword s8, s[0:1], 0x8
713; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
714; GFX9-NEXT:    s_lshr_b32 s1, s12, 8
715; GFX9-NEXT:    s_lshr_b32 s2, s12, 16
716; GFX9-NEXT:    s_lshr_b32 s3, s12, 24
717; GFX9-NEXT:    s_lshr_b32 s5, s13, 8
718; GFX9-NEXT:    s_lshr_b32 s6, s13, 16
719; GFX9-NEXT:    s_lshr_b32 s7, s13, 24
720; GFX9-NEXT:    s_lshr_b32 s9, s8, 8
721; GFX9-NEXT:    s_lshr_b32 s10, s8, 16
722; GFX9-NEXT:    s_lshr_b32 s11, s8, 24
723; GFX9-NEXT:    s_mov_b32 s0, s12
724; GFX9-NEXT:    s_mov_b32 s4, s13
725; GFX9-NEXT:    ; return to shader part epilog
726;
727; GFX7-LABEL: s_load_constant_v12i8_align8:
728; GFX7:       ; %bb.0:
729; GFX7-NEXT:    s_load_dwordx2 s[12:13], s[0:1], 0x0
730; GFX7-NEXT:    s_load_dword s8, s[0:1], 0x2
731; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
732; GFX7-NEXT:    s_lshr_b32 s1, s12, 8
733; GFX7-NEXT:    s_lshr_b32 s2, s12, 16
734; GFX7-NEXT:    s_lshr_b32 s3, s12, 24
735; GFX7-NEXT:    s_lshr_b32 s5, s13, 8
736; GFX7-NEXT:    s_lshr_b32 s6, s13, 16
737; GFX7-NEXT:    s_lshr_b32 s7, s13, 24
738; GFX7-NEXT:    s_lshr_b32 s9, s8, 8
739; GFX7-NEXT:    s_lshr_b32 s10, s8, 16
740; GFX7-NEXT:    s_lshr_b32 s11, s8, 24
741; GFX7-NEXT:    s_mov_b32 s0, s12
742; GFX7-NEXT:    s_mov_b32 s4, s13
743; GFX7-NEXT:    ; return to shader part epilog
744  %load = load <12 x i8>, <12 x i8> addrspace(4)* %ptr, align 8
745  ret <12 x i8> %load
746}
747
748define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align16(<3 x i32> addrspace(4)* inreg %ptr) {
749; GCN-LABEL: s_load_constant_v3i32_align16:
750; GCN:       ; %bb.0:
751; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
752; GCN-NEXT:    s_waitcnt lgkmcnt(0)
753; GCN-NEXT:    ; return to shader part epilog
754  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 16
755  ret <3 x i32> %load
756}
757