1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i64 @v_urem_i64(i64 %num, i64 %den) {
8; CHECK-LABEL: v_urem_i64:
9; CHECK:       ; %bb.0:
10; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; CHECK-NEXT:    v_or_b32_e32 v5, v1, v3
12; CHECK-NEXT:    v_mov_b32_e32 v4, 0
13; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
14; CHECK-NEXT:    ; implicit-def: $vgpr4_vgpr5
15; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
16; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
17; CHECK-NEXT:    s_cbranch_execz BB0_2
18; CHECK-NEXT:  ; %bb.1:
19; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
20; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
21; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
22; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v3, vcc
23; CHECK-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
24; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
25; CHECK-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
26; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
27; CHECK-NEXT:    v_trunc_f32_e32 v5, v5
28; CHECK-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
29; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
30; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
31; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v5
32; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v4
33; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v4
34; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v4
35; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
36; CHECK-NEXT:    v_mul_lo_u32 v10, v5, v9
37; CHECK-NEXT:    v_mul_hi_u32 v12, v4, v9
38; CHECK-NEXT:    v_mul_hi_u32 v9, v5, v9
39; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
40; CHECK-NEXT:    v_mul_lo_u32 v11, v4, v8
41; CHECK-NEXT:    v_mul_lo_u32 v13, v5, v8
42; CHECK-NEXT:    v_mul_hi_u32 v14, v4, v8
43; CHECK-NEXT:    v_mul_hi_u32 v8, v5, v8
44; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
45; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
46; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
47; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
48; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
49; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
50; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
51; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
52; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
53; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
54; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
55; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
56; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
57; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
58; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
59; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], v5, v8, vcc
60; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v8
61; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v4
62; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v4
63; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v4
64; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v9
65; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v8
66; CHECK-NEXT:    v_mul_hi_u32 v12, v4, v8
67; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v8
68; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
69; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
70; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v6
71; CHECK-NEXT:    v_mul_lo_u32 v10, v9, v6
72; CHECK-NEXT:    v_mul_hi_u32 v13, v4, v6
73; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v6
74; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
75; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
76; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
77; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
78; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
79; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
80; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
81; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
82; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
83; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
84; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
85; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
86; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
87; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
88; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
89; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
90; CHECK-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
91; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v4
92; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v4
93; CHECK-NEXT:    v_mul_hi_u32 v4, v1, v4
94; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v5
95; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v5
96; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v5
97; CHECK-NEXT:    v_mul_hi_u32 v5, v1, v5
98; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
99; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
100; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
101; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
102; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
103; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
104; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
105; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
106; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
107; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
108; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
109; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
110; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
111; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
112; CHECK-NEXT:    v_mul_lo_u32 v8, v3, v4
113; CHECK-NEXT:    v_mul_hi_u32 v4, v2, v4
114; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
115; CHECK-NEXT:    v_mul_lo_u32 v5, v2, v5
116; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
117; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
118; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, v0, v7
119; CHECK-NEXT:    v_subb_u32_e64 v6, s[4:5], v1, v4, vcc
120; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v4
121; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v2
122; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
123; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v3
124; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
125; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
126; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v3
127; CHECK-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
128; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v5, v2
129; CHECK-NEXT:    v_subbrev_u32_e64 v8, s[4:5], 0, v1, vcc
130; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v2
131; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
132; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
133; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v3
134; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
135; CHECK-NEXT:    v_sub_i32_e32 v11, vcc, v7, v2
136; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
137; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v3
138; CHECK-NEXT:    v_cndmask_b32_e32 v3, v10, v9, vcc
139; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
140; CHECK-NEXT:    v_cndmask_b32_e32 v3, v7, v11, vcc
141; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
142; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
143; CHECK-NEXT:    v_cndmask_b32_e32 v4, v5, v3, vcc
144; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v1, vcc
145; CHECK-NEXT:  BB0_2: ; %Flow
146; CHECK-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
147; CHECK-NEXT:    s_xor_b64 exec, exec, s[4:5]
148; CHECK-NEXT:    s_cbranch_execz BB0_4
149; CHECK-NEXT:  ; %bb.3:
150; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v2
151; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
152; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
153; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
154; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
155; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v1
156; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
157; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
158; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
159; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v2
160; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
161; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
162; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
163; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
164; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
165; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
166; CHECK-NEXT:    v_cndmask_b32_e32 v4, v0, v1, vcc
167; CHECK-NEXT:    v_mov_b32_e32 v5, 0
168; CHECK-NEXT:  BB0_4:
169; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
170; CHECK-NEXT:    v_mov_b32_e32 v0, v4
171; CHECK-NEXT:    v_mov_b32_e32 v1, v5
172; CHECK-NEXT:    s_setpc_b64 s[30:31]
173  %result = urem i64 %num, %den
174  ret i64 %result
175}
176
177; FIXME: This is a workaround for not handling uniform VGPR case.
178declare i32 @llvm.amdgcn.readfirstlane(i32)
179
180define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
181; CHECK-LABEL: s_urem_i64:
182; CHECK:       ; %bb.0:
183; CHECK-NEXT:    s_or_b64 s[6:7], s[0:1], s[2:3]
184; CHECK-NEXT:    s_mov_b32 s4, 0
185; CHECK-NEXT:    s_mov_b32 s5, -1
186; CHECK-NEXT:    s_and_b64 s[6:7], s[6:7], s[4:5]
187; CHECK-NEXT:    v_cmp_ne_u64_e64 vcc, s[6:7], 0
188; CHECK-NEXT:    s_cbranch_vccz BB1_2
189; CHECK-NEXT:  ; %bb.1:
190; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s2
191; CHECK-NEXT:    v_mov_b32_e32 v1, s3
192; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s3
193; CHECK-NEXT:    s_sub_u32 s6, 0, s2
194; CHECK-NEXT:    s_cselect_b32 s4, 1, 0
195; CHECK-NEXT:    v_mov_b32_e32 v3, s1
196; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v2
197; CHECK-NEXT:    s_and_b32 s4, s4, 1
198; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
199; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
200; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
201; CHECK-NEXT:    s_subb_u32 s7, 0, s3
202; CHECK-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v0
203; CHECK-NEXT:    v_trunc_f32_e32 v2, v2
204; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
205; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
206; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
207; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v2
208; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v0
209; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v0
210; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v0
211; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
212; CHECK-NEXT:    v_mul_lo_u32 v6, v2, v5
213; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v5
214; CHECK-NEXT:    v_mul_hi_u32 v5, v2, v5
215; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
216; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v4
217; CHECK-NEXT:    v_mul_lo_u32 v9, v2, v4
218; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v4
219; CHECK-NEXT:    v_mul_hi_u32 v4, v2, v4
220; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
221; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
222; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
223; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
224; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
225; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
226; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
227; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
228; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
229; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
230; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
231; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
232; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
233; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
234; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v5
235; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v2, v4, vcc
236; CHECK-NEXT:    v_add_i32_e64 v2, s[4:5], v2, v4
237; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v0
238; CHECK-NEXT:    v_mul_lo_u32 v6, s7, v0
239; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v0
240; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
241; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
242; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v4
243; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
244; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
245; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
246; CHECK-NEXT:    v_mul_lo_u32 v7, v0, v6
247; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
248; CHECK-NEXT:    v_mul_hi_u32 v11, v0, v6
249; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
250; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
251; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
252; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
253; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
254; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
255; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
256; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
257; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
258; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
259; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
260; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
261; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
262; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
263; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
264; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, v2, v5, vcc
265; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
266; CHECK-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
267; CHECK-NEXT:    v_mul_lo_u32 v4, s1, v0
268; CHECK-NEXT:    v_mul_hi_u32 v5, s0, v0
269; CHECK-NEXT:    v_mul_hi_u32 v0, s1, v0
270; CHECK-NEXT:    v_mul_lo_u32 v6, s0, v2
271; CHECK-NEXT:    v_mul_lo_u32 v7, s1, v2
272; CHECK-NEXT:    v_mul_hi_u32 v8, s0, v2
273; CHECK-NEXT:    v_mul_hi_u32 v2, s1, v2
274; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
275; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
276; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
277; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
278; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
279; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
280; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
281; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
282; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
283; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
284; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
285; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
286; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
287; CHECK-NEXT:    v_mul_lo_u32 v5, s2, v0
288; CHECK-NEXT:    v_mul_lo_u32 v6, s3, v0
289; CHECK-NEXT:    v_mul_hi_u32 v0, s2, v0
290; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
291; CHECK-NEXT:    v_mul_lo_u32 v2, s2, v2
292; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
293; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
294; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, s0, v5
295; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v0, vcc
296; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], s1, v0
297; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v2
298; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
299; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s3, v3
300; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[4:5]
301; CHECK-NEXT:    v_subb_u32_e32 v0, vcc, v0, v1, vcc
302; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v3
303; CHECK-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
304; CHECK-NEXT:    v_subrev_i32_e32 v3, vcc, s2, v2
305; CHECK-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
306; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v3
307; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
308; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, s2, v3
309; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s3, v0
310; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
311; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v0
312; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
313; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
314; CHECK-NEXT:    v_cndmask_b32_e32 v0, v3, v5, vcc
315; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
316; CHECK-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
317; CHECK-NEXT:    s_mov_b32 s5, 0
318; CHECK-NEXT:    s_branch BB1_3
319; CHECK-NEXT:  BB1_2:
320; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
321; CHECK-NEXT:  BB1_3: ; %Flow
322; CHECK-NEXT:    s_xor_b32 s1, s5, -1
323; CHECK-NEXT:    s_and_b32 s1, s1, 1
324; CHECK-NEXT:    s_cmp_lg_u32 s1, 0
325; CHECK-NEXT:    s_cbranch_scc1 BB1_5
326; CHECK-NEXT:  ; %bb.4:
327; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, s2
328; CHECK-NEXT:    s_sub_i32 s1, 0, s2
329; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
330; CHECK-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
331; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
332; CHECK-NEXT:    v_mul_lo_u32 v1, s1, v0
333; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
334; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
335; CHECK-NEXT:    v_mul_hi_u32 v0, s0, v0
336; CHECK-NEXT:    v_mul_lo_u32 v0, v0, s2
337; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
338; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s2, v0
339; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
340; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
341; CHECK-NEXT:    v_subrev_i32_e32 v1, vcc, s2, v0
342; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
343; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
344; CHECK-NEXT:  BB1_5:
345; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
346; CHECK-NEXT:    s_mov_b32 s1, s0
347; CHECK-NEXT:    ; return to shader part epilog
348  %result = urem i64 %num, %den
349  %cast = bitcast i64 %result to <2 x i32>
350  %elt.0 = extractelement <2 x i32> %cast, i32 0
351  %elt.1 = extractelement <2 x i32> %cast, i32 1
352  %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
353  %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
354  %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
355  %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
356  %cast.back = bitcast <2 x i32> %ins.1 to i64
357  ret i64 %cast.back
358}
359
360define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
361; GISEL-LABEL: v_urem_v2i64:
362; GISEL:       ; %bb.0:
363; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
365; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
366; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
367; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
368; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
369; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
370; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
371; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
372; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
373; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
374; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
375; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
376; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
377; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
378; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
379; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
380; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
381; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
382; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
383; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v13
384; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v12
385; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
386; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
387; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
388; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
389; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
390; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
391; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
392; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
393; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
394; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
395; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
396; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
397; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
398; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
399; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
400; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
401; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
402; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
403; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
404; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v9, v13, vcc
405; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
406; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
407; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
408; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v12
409; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v8
410; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
411; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
412; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v13
413; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v10
414; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v13
415; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
416; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
417; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
418; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
419; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
420; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v10
421; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v13
422; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v10
423; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
424; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
425; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
426; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
427; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
428; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
429; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
430; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
431; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
432; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
433; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
434; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
435; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, 0, v9, s[4:5]
436; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v8
437; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
438; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v8
439; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
440; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
441; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
442; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
443; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
444; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
445; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
446; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
447; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
448; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
449; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
450; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
451; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
452; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
453; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
454; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
455; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
456; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
457; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v8
458; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
459; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v9
460; GISEL-NEXT:    v_mul_hi_u32 v8, v4, v8
461; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
462; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
463; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
464; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], v1, v8, vcc
465; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v8
466; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
467; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
468; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
469; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
470; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v5
471; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
472; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], v0, v4
473; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
474; GISEL-NEXT:    v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5]
475; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v5
476; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
477; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v4
478; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
479; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v5
480; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
481; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v10, v4
482; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5]
483; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
484; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
485; GISEL-NEXT:    v_cndmask_b32_e32 v4, v10, v4, vcc
486; GISEL-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
487; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
488; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
489; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
490; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v6
491; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v7
492; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
493; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
494; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
495; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
496; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
497; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
498; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
499; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
500; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
501; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
502; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
503; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
504; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v5
505; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
506; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
507; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
508; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v10
509; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v11
510; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v10
511; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
512; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
513; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
514; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
515; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
516; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v11
517; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
518; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
519; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
520; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
521; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
522; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
523; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
524; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
525; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
526; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
527; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
528; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
529; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
530; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v11, vcc
531; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v11
532; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v4
533; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v4
534; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
535; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
536; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
537; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
538; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v11
539; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
540; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v11
541; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
542; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
543; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
544; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
545; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
546; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
547; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v11
548; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
549; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
550; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
551; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
552; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
553; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
554; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
555; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
556; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
557; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
558; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
559; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v9
560; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
561; GISEL-NEXT:    v_addc_u32_e64 v5, vcc, 0, v5, s[4:5]
562; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
563; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
564; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
565; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
566; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
567; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
568; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
569; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
570; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
571; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
572; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
573; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
574; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
575; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
576; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
577; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
578; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
579; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
580; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
581; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
582; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
583; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
584; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
585; GISEL-NEXT:    v_mul_lo_u32 v5, v6, v5
586; GISEL-NEXT:    v_mul_hi_u32 v4, v6, v4
587; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
588; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
589; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
590; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v3, v4, vcc
591; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
592; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v7
593; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
594; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
595; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
596; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v7
597; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
598; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v2, v6
599; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
600; GISEL-NEXT:    v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5]
601; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v7
602; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
603; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v6
604; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
605; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v7
606; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
607; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v8, v6
608; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5]
609; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
610; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
611; GISEL-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
612; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
613; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
614; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
615; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
616; GISEL-NEXT:    s_setpc_b64 s[30:31]
617;
618; CGP-LABEL: v_urem_v2i64:
619; CGP:       ; %bb.0:
620; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621; CGP-NEXT:    v_mov_b32_e32 v8, v0
622; CGP-NEXT:    v_mov_b32_e32 v9, v1
623; CGP-NEXT:    v_or_b32_e32 v1, v9, v5
624; CGP-NEXT:    v_mov_b32_e32 v0, 0
625; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
626; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
627; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
628; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
629; CGP-NEXT:    s_cbranch_execz BB2_2
630; CGP-NEXT:  ; %bb.1:
631; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
632; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v5
633; CGP-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
634; CGP-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
635; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
636; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
637; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
638; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
639; CGP-NEXT:    v_trunc_f32_e32 v1, v1
640; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
641; CGP-NEXT:    v_cvt_u32_f32_e32 v1, v1
642; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
643; CGP-NEXT:    v_mul_lo_u32 v12, v10, v1
644; CGP-NEXT:    v_mul_lo_u32 v13, v10, v0
645; CGP-NEXT:    v_mul_lo_u32 v14, v11, v0
646; CGP-NEXT:    v_mul_hi_u32 v15, v10, v0
647; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
648; CGP-NEXT:    v_mul_lo_u32 v14, v1, v13
649; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
650; CGP-NEXT:    v_mul_hi_u32 v13, v1, v13
651; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
652; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
653; CGP-NEXT:    v_mul_lo_u32 v17, v1, v12
654; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
655; CGP-NEXT:    v_mul_hi_u32 v12, v1, v12
656; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
657; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
658; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
659; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
660; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
661; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
662; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
663; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
664; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
665; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
666; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
667; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
668; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
669; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
670; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
671; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
672; CGP-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
673; CGP-NEXT:    v_mul_lo_u32 v12, v10, v0
674; CGP-NEXT:    v_mul_lo_u32 v11, v11, v0
675; CGP-NEXT:    v_mul_hi_u32 v14, v10, v0
676; CGP-NEXT:    v_mul_lo_u32 v10, v10, v13
677; CGP-NEXT:    v_mul_lo_u32 v15, v13, v12
678; CGP-NEXT:    v_mul_hi_u32 v16, v0, v12
679; CGP-NEXT:    v_mul_hi_u32 v12, v13, v12
680; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
681; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v14
682; CGP-NEXT:    v_mul_lo_u32 v11, v0, v10
683; CGP-NEXT:    v_mul_lo_u32 v14, v13, v10
684; CGP-NEXT:    v_mul_hi_u32 v17, v0, v10
685; CGP-NEXT:    v_mul_hi_u32 v10, v13, v10
686; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v15, v11
687; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
688; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
689; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
690; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
691; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
692; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
693; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
694; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
695; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v15
696; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
697; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
698; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
699; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v12
700; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v10, vcc
701; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v11
702; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
703; CGP-NEXT:    v_mul_lo_u32 v10, v9, v0
704; CGP-NEXT:    v_mul_hi_u32 v11, v8, v0
705; CGP-NEXT:    v_mul_hi_u32 v0, v9, v0
706; CGP-NEXT:    v_mul_lo_u32 v12, v8, v1
707; CGP-NEXT:    v_mul_lo_u32 v13, v9, v1
708; CGP-NEXT:    v_mul_hi_u32 v14, v8, v1
709; CGP-NEXT:    v_mul_hi_u32 v1, v9, v1
710; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
711; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
712; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
713; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
714; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
715; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
716; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
717; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
718; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
719; CGP-NEXT:    v_add_i32_e32 v11, vcc, v13, v11
720; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v10
721; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
722; CGP-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
723; CGP-NEXT:    v_mul_lo_u32 v11, v4, v0
724; CGP-NEXT:    v_mul_lo_u32 v12, v5, v0
725; CGP-NEXT:    v_mul_hi_u32 v0, v4, v0
726; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v10
727; CGP-NEXT:    v_mul_lo_u32 v1, v4, v1
728; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
729; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
730; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v8, v11
731; CGP-NEXT:    v_subb_u32_e64 v10, s[4:5], v9, v0, vcc
732; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v9, v0
733; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v4
734; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
735; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v10, v5
736; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
737; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
738; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v5
739; CGP-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
740; CGP-NEXT:    v_sub_i32_e32 v11, vcc, v1, v4
741; CGP-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v0, vcc
742; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v11, v4
743; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
744; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v5, vcc
745; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v12, v5
746; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
747; CGP-NEXT:    v_sub_i32_e32 v15, vcc, v11, v4
748; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
749; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v12, v5
750; CGP-NEXT:    v_cndmask_b32_e32 v5, v14, v13, vcc
751; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
752; CGP-NEXT:    v_cndmask_b32_e32 v5, v11, v15, vcc
753; CGP-NEXT:    v_cndmask_b32_e32 v11, v12, v0, vcc
754; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
755; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v5, vcc
756; CGP-NEXT:    v_cndmask_b32_e32 v1, v10, v11, vcc
757; CGP-NEXT:  BB2_2: ; %Flow2
758; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
759; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
760; CGP-NEXT:    s_cbranch_execz BB2_4
761; CGP-NEXT:  ; %bb.3:
762; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
763; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v4
764; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
765; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
766; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
767; CGP-NEXT:    v_mul_lo_u32 v1, v1, v0
768; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
769; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
770; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
771; CGP-NEXT:    v_mul_lo_u32 v0, v0, v4
772; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v8, v0
773; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
774; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
775; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
776; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
777; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
778; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
779; CGP-NEXT:    v_mov_b32_e32 v1, 0
780; CGP-NEXT:  BB2_4:
781; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
782; CGP-NEXT:    v_or_b32_e32 v5, v3, v7
783; CGP-NEXT:    v_mov_b32_e32 v4, 0
784; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
785; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
786; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
787; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
788; CGP-NEXT:    s_cbranch_execz BB2_6
789; CGP-NEXT:  ; %bb.5:
790; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v6
791; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v7
792; CGP-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
793; CGP-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
794; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
795; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
796; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
797; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
798; CGP-NEXT:    v_trunc_f32_e32 v5, v5
799; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
800; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
801; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
802; CGP-NEXT:    v_mul_lo_u32 v10, v8, v5
803; CGP-NEXT:    v_mul_lo_u32 v11, v8, v4
804; CGP-NEXT:    v_mul_lo_u32 v12, v9, v4
805; CGP-NEXT:    v_mul_hi_u32 v13, v8, v4
806; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
807; CGP-NEXT:    v_mul_lo_u32 v12, v5, v11
808; CGP-NEXT:    v_mul_hi_u32 v14, v4, v11
809; CGP-NEXT:    v_mul_hi_u32 v11, v5, v11
810; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
811; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
812; CGP-NEXT:    v_mul_lo_u32 v15, v5, v10
813; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
814; CGP-NEXT:    v_mul_hi_u32 v10, v5, v10
815; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
816; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
817; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
818; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
819; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
820; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
821; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
822; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
823; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
824; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
825; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
826; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
827; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
828; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
829; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
830; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v5, v10, vcc
831; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v10
832; CGP-NEXT:    v_mul_lo_u32 v10, v8, v4
833; CGP-NEXT:    v_mul_lo_u32 v9, v9, v4
834; CGP-NEXT:    v_mul_hi_u32 v12, v8, v4
835; CGP-NEXT:    v_mul_lo_u32 v8, v8, v11
836; CGP-NEXT:    v_mul_lo_u32 v13, v11, v10
837; CGP-NEXT:    v_mul_hi_u32 v14, v4, v10
838; CGP-NEXT:    v_mul_hi_u32 v10, v11, v10
839; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
840; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
841; CGP-NEXT:    v_mul_lo_u32 v9, v4, v8
842; CGP-NEXT:    v_mul_lo_u32 v12, v11, v8
843; CGP-NEXT:    v_mul_hi_u32 v15, v4, v8
844; CGP-NEXT:    v_mul_hi_u32 v8, v11, v8
845; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v13, v9
846; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
847; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
848; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
849; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v14
850; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
851; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v15
852; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
853; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
854; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v13
855; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v9
856; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
857; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
858; CGP-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v10
859; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
860; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v9
861; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
862; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
863; CGP-NEXT:    v_mul_hi_u32 v9, v2, v4
864; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
865; CGP-NEXT:    v_mul_lo_u32 v10, v2, v5
866; CGP-NEXT:    v_mul_lo_u32 v11, v3, v5
867; CGP-NEXT:    v_mul_hi_u32 v12, v2, v5
868; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
869; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
870; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
871; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
872; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
873; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
874; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
875; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
876; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
877; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
878; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
879; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
880; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
881; CGP-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
882; CGP-NEXT:    v_mul_lo_u32 v9, v6, v4
883; CGP-NEXT:    v_mul_lo_u32 v10, v7, v4
884; CGP-NEXT:    v_mul_hi_u32 v4, v6, v4
885; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
886; CGP-NEXT:    v_mul_lo_u32 v5, v6, v5
887; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
888; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
889; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v2, v9
890; CGP-NEXT:    v_subb_u32_e64 v8, s[4:5], v3, v4, vcc
891; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
892; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v6
893; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
894; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v7
895; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
896; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
897; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v7
898; CGP-NEXT:    v_cndmask_b32_e32 v4, v9, v4, vcc
899; CGP-NEXT:    v_sub_i32_e32 v9, vcc, v5, v6
900; CGP-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc
901; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v6
902; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
903; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
904; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v7
905; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
906; CGP-NEXT:    v_sub_i32_e32 v13, vcc, v9, v6
907; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
908; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v7
909; CGP-NEXT:    v_cndmask_b32_e32 v7, v12, v11, vcc
910; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
911; CGP-NEXT:    v_cndmask_b32_e32 v7, v9, v13, vcc
912; CGP-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
913; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
914; CGP-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
915; CGP-NEXT:    v_cndmask_b32_e32 v5, v8, v3, vcc
916; CGP-NEXT:  BB2_6: ; %Flow
917; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
918; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
919; CGP-NEXT:    s_cbranch_execz BB2_8
920; CGP-NEXT:  ; %bb.7:
921; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v6
922; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v6
923; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
924; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
925; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
926; CGP-NEXT:    v_mul_lo_u32 v4, v4, v3
927; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
928; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
929; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
930; CGP-NEXT:    v_mul_lo_u32 v3, v3, v6
931; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
932; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v6
933; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
934; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
935; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v6
936; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
937; CGP-NEXT:    v_cndmask_b32_e32 v4, v2, v3, vcc
938; CGP-NEXT:    v_mov_b32_e32 v5, 0
939; CGP-NEXT:  BB2_8:
940; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
941; CGP-NEXT:    v_mov_b32_e32 v2, v4
942; CGP-NEXT:    v_mov_b32_e32 v3, v5
943; CGP-NEXT:    s_setpc_b64 s[30:31]
944  %result = urem <2 x i64> %num, %den
945  ret <2 x i64> %result
946}
947
948define i64 @v_urem_i64_pow2k_denom(i64 %num) {
949; CHECK-LABEL: v_urem_i64_pow2k_denom:
950; CHECK:       ; %bb.0:
951; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
952; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x1000
953; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
954; CHECK-NEXT:    s_movk_i32 s6, 0xf000
955; CHECK-NEXT:    s_movk_i32 s7, 0x1000
956; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
957; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
958; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
959; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
960; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
961; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
962; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
963; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
964; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v3
965; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
966; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
967; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
968; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
969; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
970; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
971; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
972; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
973; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
974; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v4
975; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
976; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
977; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
978; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
979; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
980; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
981; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
982; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
983; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
984; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
985; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
986; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
987; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
988; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
989; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
990; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
991; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
992; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v3, v4, vcc
993; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v4
994; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v2
995; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
996; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
997; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
998; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
999; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
1000; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
1001; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
1002; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
1003; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
1004; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
1005; CHECK-NEXT:    v_mul_hi_u32 v11, v2, v6
1006; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
1007; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
1008; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
1009; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
1010; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1011; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
1012; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
1013; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
1014; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
1015; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
1016; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
1017; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
1018; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
1019; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
1020; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
1021; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
1022; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1023; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1024; CHECK-NEXT:    v_mul_lo_u32 v4, v1, v2
1025; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v2
1026; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
1027; CHECK-NEXT:    v_mul_lo_u32 v6, v0, v3
1028; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
1029; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v3
1030; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
1031; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
1032; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1033; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
1034; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1035; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
1036; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1037; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
1038; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1039; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1040; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1041; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1042; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1043; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
1044; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
1045; CHECK-NEXT:    v_mul_lo_u32 v6, 0, v2
1046; CHECK-NEXT:    v_mul_hi_u32 v2, s7, v2
1047; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
1048; CHECK-NEXT:    v_mul_lo_u32 v3, s7, v3
1049; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
1050; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1051; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
1052; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v1, v2, vcc
1053; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
1054; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
1055; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
1056; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v3
1057; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1058; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1059; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1060; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
1061; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s7, v0
1062; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1063; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s7, v4
1064; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
1065; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
1066; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
1067; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, s7, v4
1068; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
1069; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1070; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
1071; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
1072; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
1073; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
1074; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
1075; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1076; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
1077; CHECK-NEXT:    s_setpc_b64 s[30:31]
1078  %result = urem i64 %num, 4096
1079  ret i64 %result
1080}
1081
1082define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) {
1083; GISEL-LABEL: v_urem_v2i64_pow2k_denom:
1084; GISEL:       ; %bb.0:
1085; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1086; GISEL-NEXT:    s_movk_i32 s10, 0x1000
1087; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s10
1088; GISEL-NEXT:    s_sub_u32 s8, 0, s10
1089; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
1090; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
1091; GISEL-NEXT:    v_mov_b32_e32 v6, v4
1092; GISEL-NEXT:    s_and_b32 s4, s4, 1
1093; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
1094; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
1095; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
1096; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
1097; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1098; GISEL-NEXT:    s_subb_u32 s9, 0, 0
1099; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1100; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
1101; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
1102; GISEL-NEXT:    s_sub_u32 s11, 0, s10
1103; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
1104; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
1105; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
1106; GISEL-NEXT:    s_and_b32 s4, s4, 1
1107; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
1108; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
1109; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
1110; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
1111; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
1112; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
1113; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1114; GISEL-NEXT:    s_subb_u32 s6, 0, 0
1115; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v6
1116; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
1117; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v7
1118; GISEL-NEXT:    v_mul_lo_u32 v10, s11, v4
1119; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
1120; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
1121; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
1122; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
1123; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
1124; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
1125; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
1126; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v10
1127; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
1128; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
1129; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v13
1130; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v13
1131; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v13
1132; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1133; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1134; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
1135; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v8
1136; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
1137; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
1138; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
1139; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
1140; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1141; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1142; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v9
1143; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v9
1144; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
1145; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
1146; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1147; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
1148; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1149; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
1150; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1151; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
1152; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1153; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
1154; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1155; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1156; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
1157; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1158; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1159; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
1160; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
1161; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1162; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1163; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1164; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1165; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1166; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1167; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
1168; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
1169; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
1170; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
1171; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
1172; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
1173; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v4
1174; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
1175; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
1176; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
1177; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
1178; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
1179; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v5
1180; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
1181; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
1182; GISEL-NEXT:    v_mul_lo_u32 v16, s11, v10
1183; GISEL-NEXT:    v_mul_lo_u32 v17, v10, v8
1184; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
1185; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
1186; GISEL-NEXT:    v_mul_lo_u32 v19, s8, v13
1187; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
1188; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v9
1189; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
1190; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v9
1191; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
1192; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
1193; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
1194; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v11
1195; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v12
1196; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
1197; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
1198; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
1199; GISEL-NEXT:    v_mul_lo_u32 v15, v10, v11
1200; GISEL-NEXT:    v_mul_hi_u32 v19, v4, v11
1201; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v11
1202; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v12
1203; GISEL-NEXT:    v_mul_hi_u32 v13, v13, v12
1204; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v12
1205; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
1206; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
1207; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
1208; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
1209; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
1210; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
1211; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
1212; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
1213; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
1214; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
1215; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
1216; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
1217; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
1218; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
1219; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
1220; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
1221; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
1222; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
1223; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
1224; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
1225; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
1226; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
1227; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
1228; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
1229; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
1230; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
1231; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
1232; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1233; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
1234; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
1235; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
1236; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
1237; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1238; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
1239; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
1240; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v5
1241; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
1242; GISEL-NEXT:    v_mul_lo_u32 v12, v2, v6
1243; GISEL-NEXT:    v_mul_lo_u32 v13, v3, v6
1244; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v6
1245; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
1246; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v7
1247; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v7
1248; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v7
1249; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
1250; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1251; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1252; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
1253; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1254; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1255; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1256; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
1257; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1258; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1259; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1260; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
1261; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1262; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1263; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1264; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
1265; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1266; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
1267; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
1268; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
1269; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
1270; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1271; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1272; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1273; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1274; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
1275; GISEL-NEXT:    v_mul_lo_u32 v10, s10, v4
1276; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v4
1277; GISEL-NEXT:    v_mul_hi_u32 v4, s10, v4
1278; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
1279; GISEL-NEXT:    v_mul_lo_u32 v11, s10, v5
1280; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v5
1281; GISEL-NEXT:    v_mul_hi_u32 v5, s10, v5
1282; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1283; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
1284; GISEL-NEXT:    v_mul_lo_u32 v6, s10, v6
1285; GISEL-NEXT:    v_mul_lo_u32 v7, s10, v7
1286; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
1287; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
1288; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1289; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1290; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
1291; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
1292; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
1293; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
1294; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1295; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
1296; GISEL-NEXT:    v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
1297; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v5
1298; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v0
1299; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
1300; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
1301; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
1302; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1303; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v7
1304; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
1305; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
1306; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
1307; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
1308; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, s10, v2
1309; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1310; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v8
1311; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
1312; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
1313; GISEL-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
1314; GISEL-NEXT:    v_subrev_i32_e32 v9, vcc, s10, v0
1315; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1316; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v9
1317; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
1318; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
1319; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
1320; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, s10, v8
1321; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
1322; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
1323; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
1324; GISEL-NEXT:    v_subrev_i32_e32 v16, vcc, s10, v9
1325; GISEL-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
1326; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1327; GISEL-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
1328; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1329; GISEL-NEXT:    v_cndmask_b32_e32 v11, v15, v11, vcc
1330; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
1331; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc
1332; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
1333; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
1334; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
1335; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
1336; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
1337; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
1338; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
1339; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[4:5]
1340; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
1341; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
1342; GISEL-NEXT:    s_setpc_b64 s[30:31]
1343;
1344; CGP-LABEL: v_urem_v2i64_pow2k_denom:
1345; CGP:       ; %bb.0:
1346; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1347; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x1000
1348; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
1349; CGP-NEXT:    s_movk_i32 s8, 0xf000
1350; CGP-NEXT:    s_movk_i32 s10, 0x1000
1351; CGP-NEXT:    v_mov_b32_e32 v6, v4
1352; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
1353; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
1354; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
1355; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
1356; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1357; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
1358; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
1359; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
1360; CGP-NEXT:    v_trunc_f32_e32 v6, v6
1361; CGP-NEXT:    v_trunc_f32_e32 v7, v7
1362; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
1363; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
1364; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
1365; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
1366; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
1367; CGP-NEXT:    v_mul_lo_u32 v8, s8, v6
1368; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
1369; CGP-NEXT:    v_mul_lo_u32 v9, s8, v7
1370; CGP-NEXT:    v_mul_lo_u32 v10, s8, v4
1371; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
1372; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
1373; CGP-NEXT:    v_mul_lo_u32 v13, s8, v5
1374; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
1375; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
1376; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
1377; CGP-NEXT:    v_mul_lo_u32 v11, v6, v10
1378; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
1379; CGP-NEXT:    v_mul_hi_u32 v10, v6, v10
1380; CGP-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
1381; CGP-NEXT:    v_mul_lo_u32 v14, v7, v13
1382; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
1383; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
1384; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1385; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1386; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
1387; CGP-NEXT:    v_mul_lo_u32 v15, v6, v8
1388; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
1389; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
1390; CGP-NEXT:    v_mul_lo_u32 v19, v5, v9
1391; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
1392; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1393; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1394; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
1395; CGP-NEXT:    v_mul_hi_u32 v17, v5, v9
1396; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
1397; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
1398; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1399; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
1400; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1401; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
1402; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1403; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
1404; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1405; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
1406; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1407; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1408; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
1409; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1410; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1411; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
1412; CGP-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
1413; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1414; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1415; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1416; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1417; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1418; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1419; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
1420; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
1421; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
1422; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
1423; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
1424; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
1425; CGP-NEXT:    v_mul_lo_u32 v8, s8, v4
1426; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
1427; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
1428; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
1429; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
1430; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
1431; CGP-NEXT:    v_mul_lo_u32 v9, s8, v5
1432; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
1433; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
1434; CGP-NEXT:    v_mul_lo_u32 v16, s8, v10
1435; CGP-NEXT:    v_mul_lo_u32 v17, v10, v8
1436; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
1437; CGP-NEXT:    v_mul_hi_u32 v8, v10, v8
1438; CGP-NEXT:    v_mul_lo_u32 v19, s8, v13
1439; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
1440; CGP-NEXT:    v_mul_lo_u32 v16, v13, v9
1441; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
1442; CGP-NEXT:    v_mul_hi_u32 v19, v5, v9
1443; CGP-NEXT:    v_mul_hi_u32 v9, v13, v9
1444; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
1445; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
1446; CGP-NEXT:    v_mul_lo_u32 v14, v4, v11
1447; CGP-NEXT:    v_mul_lo_u32 v15, v5, v12
1448; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
1449; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
1450; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
1451; CGP-NEXT:    v_mul_lo_u32 v15, v10, v11
1452; CGP-NEXT:    v_mul_hi_u32 v19, v4, v11
1453; CGP-NEXT:    v_mul_hi_u32 v10, v10, v11
1454; CGP-NEXT:    v_mul_lo_u32 v11, v13, v12
1455; CGP-NEXT:    v_mul_hi_u32 v13, v13, v12
1456; CGP-NEXT:    v_mul_hi_u32 v12, v5, v12
1457; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
1458; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
1459; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
1460; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
1461; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
1462; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
1463; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
1464; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
1465; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
1466; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
1467; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
1468; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
1469; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
1470; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
1471; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
1472; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
1473; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
1474; CGP-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
1475; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
1476; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
1477; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
1478; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
1479; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
1480; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
1481; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
1482; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
1483; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
1484; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1485; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
1486; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
1487; CGP-NEXT:    v_mul_hi_u32 v10, v2, v4
1488; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
1489; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1490; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
1491; CGP-NEXT:    v_mul_lo_u32 v9, v1, v5
1492; CGP-NEXT:    v_mul_hi_u32 v11, v0, v5
1493; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
1494; CGP-NEXT:    v_mul_lo_u32 v12, v2, v6
1495; CGP-NEXT:    v_mul_lo_u32 v13, v3, v6
1496; CGP-NEXT:    v_mul_hi_u32 v14, v2, v6
1497; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
1498; CGP-NEXT:    v_mul_lo_u32 v15, v0, v7
1499; CGP-NEXT:    v_mul_lo_u32 v16, v1, v7
1500; CGP-NEXT:    v_mul_hi_u32 v17, v0, v7
1501; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
1502; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1503; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1504; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
1505; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1506; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1507; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1508; CGP-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
1509; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1510; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1511; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1512; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
1513; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1514; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1515; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1516; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
1517; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1518; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
1519; CGP-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
1520; CGP-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
1521; CGP-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
1522; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1523; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1524; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1525; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1526; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
1527; CGP-NEXT:    v_mul_lo_u32 v10, s10, v4
1528; CGP-NEXT:    v_mul_lo_u32 v12, 0, v4
1529; CGP-NEXT:    v_mul_hi_u32 v4, s10, v4
1530; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
1531; CGP-NEXT:    v_mul_lo_u32 v11, s10, v5
1532; CGP-NEXT:    v_mul_lo_u32 v13, 0, v5
1533; CGP-NEXT:    v_mul_hi_u32 v5, s10, v5
1534; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1535; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
1536; CGP-NEXT:    v_mul_lo_u32 v6, s10, v6
1537; CGP-NEXT:    v_mul_lo_u32 v7, s10, v7
1538; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
1539; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
1540; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1541; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1542; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
1543; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
1544; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
1545; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
1546; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1547; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
1548; CGP-NEXT:    v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
1549; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v5
1550; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v0
1551; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
1552; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
1553; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
1554; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1555; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v7
1556; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
1557; CGP-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
1558; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
1559; CGP-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
1560; CGP-NEXT:    v_subrev_i32_e32 v8, vcc, s10, v2
1561; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1562; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v8
1563; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
1564; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
1565; CGP-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
1566; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s10, v0
1567; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1568; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v9
1569; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
1570; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
1571; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
1572; CGP-NEXT:    v_subrev_i32_e32 v13, vcc, s10, v8
1573; CGP-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
1574; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
1575; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
1576; CGP-NEXT:    v_subrev_i32_e32 v16, vcc, s10, v9
1577; CGP-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
1578; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1579; CGP-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
1580; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1581; CGP-NEXT:    v_cndmask_b32_e32 v11, v15, v11, vcc
1582; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
1583; CGP-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc
1584; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
1585; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
1586; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
1587; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
1588; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
1589; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
1590; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
1591; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[4:5]
1592; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
1593; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
1594; CGP-NEXT:    s_setpc_b64 s[30:31]
1595  %result = urem <2 x i64> %num, <i64 4096, i64 4096>
1596  ret <2 x i64> %result
1597}
1598
1599define i64 @v_urem_i64_oddk_denom(i64 %num) {
1600; CHECK-LABEL: v_urem_i64_oddk_denom:
1601; CHECK:       ; %bb.0:
1602; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1603; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x12d8fb
1604; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
1605; CHECK-NEXT:    s_mov_b32 s6, 0xffed2705
1606; CHECK-NEXT:    s_mov_b32 s7, 0x12d8fb
1607; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
1608; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1609; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
1610; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
1611; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
1612; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
1613; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
1614; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
1615; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v3
1616; CHECK-NEXT:    v_mul_lo_u32 v5, s6, v2
1617; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
1618; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
1619; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1620; CHECK-NEXT:    v_mul_lo_u32 v6, v3, v5
1621; CHECK-NEXT:    v_mul_hi_u32 v8, v2, v5
1622; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v5
1623; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
1624; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v4
1625; CHECK-NEXT:    v_mul_lo_u32 v9, v3, v4
1626; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
1627; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
1628; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
1629; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1630; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
1631; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1632; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1633; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1634; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
1635; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1636; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1637; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v8
1638; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1639; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1640; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
1641; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
1642; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
1643; CHECK-NEXT:    v_addc_u32_e64 v5, s[4:5], v3, v4, vcc
1644; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v4
1645; CHECK-NEXT:    v_mul_lo_u32 v4, s6, v2
1646; CHECK-NEXT:    v_mul_lo_u32 v6, -1, v2
1647; CHECK-NEXT:    v_mul_hi_u32 v7, s6, v2
1648; CHECK-NEXT:    v_mul_lo_u32 v8, s6, v5
1649; CHECK-NEXT:    v_mul_lo_u32 v9, v5, v4
1650; CHECK-NEXT:    v_mul_hi_u32 v10, v2, v4
1651; CHECK-NEXT:    v_mul_hi_u32 v4, v5, v4
1652; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
1653; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v7
1654; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
1655; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v6
1656; CHECK-NEXT:    v_mul_hi_u32 v11, v2, v6
1657; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v6
1658; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v9, v7
1659; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
1660; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v8, v4
1661; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1662; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
1663; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
1664; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v11
1665; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
1666; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
1667; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v9
1668; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v6
1669; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
1670; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
1671; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v6
1672; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
1673; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1674; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
1675; CHECK-NEXT:    v_mul_lo_u32 v4, v1, v2
1676; CHECK-NEXT:    v_mul_hi_u32 v5, v0, v2
1677; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
1678; CHECK-NEXT:    v_mul_lo_u32 v6, v0, v3
1679; CHECK-NEXT:    v_mul_lo_u32 v7, v1, v3
1680; CHECK-NEXT:    v_mul_hi_u32 v8, v0, v3
1681; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
1682; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
1683; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
1684; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v7, v2
1685; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
1686; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
1687; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1688; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v8
1689; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
1690; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1691; CHECK-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1692; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1693; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
1694; CHECK-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
1695; CHECK-NEXT:    v_mul_lo_u32 v5, s7, v2
1696; CHECK-NEXT:    v_mul_lo_u32 v6, 0, v2
1697; CHECK-NEXT:    v_mul_hi_u32 v2, s7, v2
1698; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
1699; CHECK-NEXT:    v_mul_lo_u32 v3, s7, v3
1700; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
1701; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
1702; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
1703; CHECK-NEXT:    v_subb_u32_e64 v3, s[4:5], v1, v2, vcc
1704; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
1705; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s7, v0
1706; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
1707; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], 0, v3
1708; CHECK-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1709; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1710; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1711; CHECK-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
1712; CHECK-NEXT:    v_subrev_i32_e32 v4, vcc, s7, v0
1713; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1714; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s7, v4
1715; CHECK-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
1716; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
1717; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
1718; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, s7, v4
1719; CHECK-NEXT:    v_subbrev_u32_e32 v8, vcc, 0, v1, vcc
1720; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1721; CHECK-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
1722; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
1723; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
1724; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
1725; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
1726; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1727; CHECK-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
1728; CHECK-NEXT:    s_setpc_b64 s[30:31]
1729  %result = urem i64 %num, 1235195
1730  ret i64 %result
1731}
1732
1733define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
1734; GISEL-LABEL: v_urem_v2i64_oddk_denom:
1735; GISEL:       ; %bb.0:
1736; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1737; GISEL-NEXT:    s_mov_b32 s10, 0x12d8fb
1738; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s10
1739; GISEL-NEXT:    s_sub_u32 s8, 0, s10
1740; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
1741; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
1742; GISEL-NEXT:    v_mov_b32_e32 v6, v4
1743; GISEL-NEXT:    s_and_b32 s4, s4, 1
1744; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
1745; GISEL-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
1746; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
1747; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v6
1748; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1749; GISEL-NEXT:    s_subb_u32 s9, 0, 0
1750; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
1751; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
1752; GISEL-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
1753; GISEL-NEXT:    s_sub_u32 s11, 0, s10
1754; GISEL-NEXT:    s_cselect_b32 s4, 1, 0
1755; GISEL-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
1756; GISEL-NEXT:    v_trunc_f32_e32 v6, v6
1757; GISEL-NEXT:    s_and_b32 s4, s4, 1
1758; GISEL-NEXT:    v_trunc_f32_e32 v7, v7
1759; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
1760; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
1761; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
1762; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
1763; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
1764; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
1765; GISEL-NEXT:    s_subb_u32 s6, 0, 0
1766; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v6
1767; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
1768; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v7
1769; GISEL-NEXT:    v_mul_lo_u32 v10, s11, v4
1770; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
1771; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
1772; GISEL-NEXT:    v_mul_lo_u32 v13, s8, v5
1773; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
1774; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
1775; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
1776; GISEL-NEXT:    v_mul_lo_u32 v11, v6, v10
1777; GISEL-NEXT:    v_mul_hi_u32 v16, v4, v10
1778; GISEL-NEXT:    v_mul_hi_u32 v10, v6, v10
1779; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
1780; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v13
1781; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v13
1782; GISEL-NEXT:    v_mul_hi_u32 v13, v7, v13
1783; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1784; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1785; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
1786; GISEL-NEXT:    v_mul_lo_u32 v15, v6, v8
1787; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
1788; GISEL-NEXT:    v_mul_hi_u32 v8, v6, v8
1789; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v9
1790; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
1791; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
1792; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1793; GISEL-NEXT:    v_mul_lo_u32 v14, v7, v9
1794; GISEL-NEXT:    v_mul_hi_u32 v17, v5, v9
1795; GISEL-NEXT:    v_mul_hi_u32 v9, v7, v9
1796; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
1797; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1798; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
1799; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1800; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
1801; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1802; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
1803; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1804; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
1805; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1806; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
1807; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
1808; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
1809; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1810; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
1811; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
1812; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
1813; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
1814; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1815; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
1816; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1817; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1818; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
1819; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
1820; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
1821; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
1822; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
1823; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
1824; GISEL-NEXT:    v_mul_lo_u32 v8, s11, v4
1825; GISEL-NEXT:    v_mul_lo_u32 v11, s6, v4
1826; GISEL-NEXT:    v_mul_hi_u32 v12, s11, v4
1827; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
1828; GISEL-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
1829; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
1830; GISEL-NEXT:    v_mul_lo_u32 v9, s8, v5
1831; GISEL-NEXT:    v_mul_lo_u32 v14, s9, v5
1832; GISEL-NEXT:    v_mul_hi_u32 v15, s8, v5
1833; GISEL-NEXT:    v_mul_lo_u32 v16, s11, v10
1834; GISEL-NEXT:    v_mul_lo_u32 v17, v10, v8
1835; GISEL-NEXT:    v_mul_hi_u32 v18, v4, v8
1836; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
1837; GISEL-NEXT:    v_mul_lo_u32 v19, s8, v13
1838; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
1839; GISEL-NEXT:    v_mul_lo_u32 v16, v13, v9
1840; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
1841; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v9
1842; GISEL-NEXT:    v_mul_hi_u32 v9, v13, v9
1843; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
1844; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
1845; GISEL-NEXT:    v_mul_lo_u32 v14, v4, v11
1846; GISEL-NEXT:    v_mul_lo_u32 v15, v5, v12
1847; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
1848; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
1849; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
1850; GISEL-NEXT:    v_mul_lo_u32 v15, v10, v11
1851; GISEL-NEXT:    v_mul_hi_u32 v19, v4, v11
1852; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v11
1853; GISEL-NEXT:    v_mul_lo_u32 v11, v13, v12
1854; GISEL-NEXT:    v_mul_hi_u32 v13, v13, v12
1855; GISEL-NEXT:    v_mul_hi_u32 v12, v5, v12
1856; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
1857; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
1858; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
1859; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
1860; GISEL-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
1861; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
1862; GISEL-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
1863; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
1864; GISEL-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
1865; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
1866; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
1867; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
1868; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
1869; GISEL-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
1870; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
1871; GISEL-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
1872; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
1873; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
1874; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
1875; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
1876; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
1877; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
1878; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
1879; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
1880; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
1881; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
1882; GISEL-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
1883; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1884; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
1885; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
1886; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
1887; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
1888; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1889; GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
1890; GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
1891; GISEL-NEXT:    v_mul_hi_u32 v11, v0, v5
1892; GISEL-NEXT:    v_mul_hi_u32 v5, v1, v5
1893; GISEL-NEXT:    v_mul_lo_u32 v12, v2, v6
1894; GISEL-NEXT:    v_mul_lo_u32 v13, v3, v6
1895; GISEL-NEXT:    v_mul_hi_u32 v14, v2, v6
1896; GISEL-NEXT:    v_mul_hi_u32 v6, v3, v6
1897; GISEL-NEXT:    v_mul_lo_u32 v15, v0, v7
1898; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v7
1899; GISEL-NEXT:    v_mul_hi_u32 v17, v0, v7
1900; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
1901; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
1902; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
1903; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
1904; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
1905; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
1906; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
1907; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
1908; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
1909; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
1910; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1911; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
1912; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
1913; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1914; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1915; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
1916; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
1917; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
1918; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
1919; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
1920; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
1921; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
1922; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
1923; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1924; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
1925; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
1926; GISEL-NEXT:    v_mul_lo_u32 v10, s10, v4
1927; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v4
1928; GISEL-NEXT:    v_mul_hi_u32 v4, s10, v4
1929; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
1930; GISEL-NEXT:    v_mul_lo_u32 v11, s10, v5
1931; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v5
1932; GISEL-NEXT:    v_mul_hi_u32 v5, s10, v5
1933; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
1934; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
1935; GISEL-NEXT:    v_mul_lo_u32 v6, s10, v6
1936; GISEL-NEXT:    v_mul_lo_u32 v7, s10, v7
1937; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
1938; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
1939; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
1940; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
1941; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
1942; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
1943; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
1944; GISEL-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
1945; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1946; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
1947; GISEL-NEXT:    v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
1948; GISEL-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v5
1949; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v0
1950; GISEL-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
1951; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
1952; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
1953; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1954; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v7
1955; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
1956; GISEL-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
1957; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
1958; GISEL-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
1959; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, s10, v2
1960; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1961; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v8
1962; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
1963; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
1964; GISEL-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
1965; GISEL-NEXT:    v_subrev_i32_e32 v9, vcc, s10, v0
1966; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1967; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v9
1968; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
1969; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
1970; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
1971; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, s10, v8
1972; GISEL-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
1973; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
1974; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
1975; GISEL-NEXT:    v_subrev_i32_e32 v16, vcc, s10, v9
1976; GISEL-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
1977; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1978; GISEL-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
1979; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
1980; GISEL-NEXT:    v_cndmask_b32_e32 v11, v15, v11, vcc
1981; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
1982; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc
1983; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
1984; GISEL-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
1985; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
1986; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
1987; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
1988; GISEL-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
1989; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
1990; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[4:5]
1991; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
1992; GISEL-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
1993; GISEL-NEXT:    s_setpc_b64 s[30:31]
1994;
1995; CGP-LABEL: v_urem_v2i64_oddk_denom:
1996; CGP:       ; %bb.0:
1997; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1998; CGP-NEXT:    v_cvt_f32_u32_e32 v4, 0x12d8fb
1999; CGP-NEXT:    v_cvt_f32_ubyte0_e32 v5, 0
2000; CGP-NEXT:    s_mov_b32 s8, 0xffed2705
2001; CGP-NEXT:    s_mov_b32 s10, 0x12d8fb
2002; CGP-NEXT:    v_mov_b32_e32 v6, v4
2003; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
2004; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v5
2005; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
2006; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v6
2007; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
2008; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
2009; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
2010; CGP-NEXT:    v_mul_f32_e32 v7, 0x2f800000, v5
2011; CGP-NEXT:    v_trunc_f32_e32 v6, v6
2012; CGP-NEXT:    v_trunc_f32_e32 v7, v7
2013; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
2014; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
2015; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
2016; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
2017; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
2018; CGP-NEXT:    v_mul_lo_u32 v8, s8, v6
2019; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
2020; CGP-NEXT:    v_mul_lo_u32 v9, s8, v7
2021; CGP-NEXT:    v_mul_lo_u32 v10, s8, v4
2022; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
2023; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
2024; CGP-NEXT:    v_mul_lo_u32 v13, s8, v5
2025; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
2026; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
2027; CGP-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
2028; CGP-NEXT:    v_mul_lo_u32 v11, v6, v10
2029; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
2030; CGP-NEXT:    v_mul_hi_u32 v10, v6, v10
2031; CGP-NEXT:    v_add_i32_e32 v9, vcc, v14, v9
2032; CGP-NEXT:    v_mul_lo_u32 v14, v7, v13
2033; CGP-NEXT:    v_mul_hi_u32 v17, v5, v13
2034; CGP-NEXT:    v_mul_hi_u32 v13, v7, v13
2035; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
2036; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
2037; CGP-NEXT:    v_mul_lo_u32 v12, v4, v8
2038; CGP-NEXT:    v_mul_lo_u32 v15, v6, v8
2039; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
2040; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
2041; CGP-NEXT:    v_mul_lo_u32 v19, v5, v9
2042; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
2043; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
2044; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
2045; CGP-NEXT:    v_mul_lo_u32 v14, v7, v9
2046; CGP-NEXT:    v_mul_hi_u32 v17, v5, v9
2047; CGP-NEXT:    v_mul_hi_u32 v9, v7, v9
2048; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v12
2049; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2050; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v15, v10
2051; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2052; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
2053; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2054; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v16
2055; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2056; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v18
2057; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
2058; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
2059; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
2060; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
2061; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
2062; CGP-NEXT:    v_add_i32_e32 v12, vcc, v15, v16
2063; CGP-NEXT:    v_add_i32_e32 v15, vcc, v19, v18
2064; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
2065; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
2066; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2067; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
2068; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2069; CGP-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
2070; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v15
2071; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
2072; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
2073; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
2074; CGP-NEXT:    v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
2075; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
2076; CGP-NEXT:    v_mul_lo_u32 v8, s8, v4
2077; CGP-NEXT:    v_mul_lo_u32 v11, -1, v4
2078; CGP-NEXT:    v_mul_hi_u32 v12, s8, v4
2079; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v13
2080; CGP-NEXT:    v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
2081; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v9
2082; CGP-NEXT:    v_mul_lo_u32 v9, s8, v5
2083; CGP-NEXT:    v_mul_lo_u32 v14, -1, v5
2084; CGP-NEXT:    v_mul_hi_u32 v15, s8, v5
2085; CGP-NEXT:    v_mul_lo_u32 v16, s8, v10
2086; CGP-NEXT:    v_mul_lo_u32 v17, v10, v8
2087; CGP-NEXT:    v_mul_hi_u32 v18, v4, v8
2088; CGP-NEXT:    v_mul_hi_u32 v8, v10, v8
2089; CGP-NEXT:    v_mul_lo_u32 v19, s8, v13
2090; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v16
2091; CGP-NEXT:    v_mul_lo_u32 v16, v13, v9
2092; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v14, v19
2093; CGP-NEXT:    v_mul_hi_u32 v19, v5, v9
2094; CGP-NEXT:    v_mul_hi_u32 v9, v13, v9
2095; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
2096; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v14, v15
2097; CGP-NEXT:    v_mul_lo_u32 v14, v4, v11
2098; CGP-NEXT:    v_mul_lo_u32 v15, v5, v12
2099; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v16, v15
2100; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[6:7]
2101; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v19
2102; CGP-NEXT:    v_mul_lo_u32 v15, v10, v11
2103; CGP-NEXT:    v_mul_hi_u32 v19, v4, v11
2104; CGP-NEXT:    v_mul_hi_u32 v10, v10, v11
2105; CGP-NEXT:    v_mul_lo_u32 v11, v13, v12
2106; CGP-NEXT:    v_mul_hi_u32 v13, v13, v12
2107; CGP-NEXT:    v_mul_hi_u32 v12, v5, v12
2108; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v17, v14
2109; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[8:9]
2110; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v15, v8
2111; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
2112; CGP-NEXT:    v_add_i32_e64 v9, s[8:9], v11, v9
2113; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[8:9]
2114; CGP-NEXT:    v_add_i32_e64 v14, s[8:9], v14, v18
2115; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
2116; CGP-NEXT:    v_add_i32_e64 v8, s[8:9], v8, v19
2117; CGP-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
2118; CGP-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
2119; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
2120; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
2121; CGP-NEXT:    v_add_i32_e64 v14, s[6:7], v17, v14
2122; CGP-NEXT:    v_add_i32_e64 v15, s[6:7], v15, v18
2123; CGP-NEXT:    v_add_i32_e64 v16, s[6:7], v16, v19
2124; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v12
2125; CGP-NEXT:    v_add_i32_e64 v8, s[6:7], v8, v14
2126; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
2127; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v16
2128; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
2129; CGP-NEXT:    v_add_i32_e64 v12, s[6:7], v15, v12
2130; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v14
2131; CGP-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v12
2132; CGP-NEXT:    v_add_i32_e64 v11, s[6:7], v13, v11
2133; CGP-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
2134; CGP-NEXT:    v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
2135; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
2136; CGP-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
2137; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
2138; CGP-NEXT:    v_mul_hi_u32 v10, v2, v4
2139; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
2140; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
2141; CGP-NEXT:    v_addc_u32_e32 v7, vcc, 0, v7, vcc
2142; CGP-NEXT:    v_mul_lo_u32 v9, v1, v5
2143; CGP-NEXT:    v_mul_hi_u32 v11, v0, v5
2144; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
2145; CGP-NEXT:    v_mul_lo_u32 v12, v2, v6
2146; CGP-NEXT:    v_mul_lo_u32 v13, v3, v6
2147; CGP-NEXT:    v_mul_hi_u32 v14, v2, v6
2148; CGP-NEXT:    v_mul_hi_u32 v6, v3, v6
2149; CGP-NEXT:    v_mul_lo_u32 v15, v0, v7
2150; CGP-NEXT:    v_mul_lo_u32 v16, v1, v7
2151; CGP-NEXT:    v_mul_hi_u32 v17, v0, v7
2152; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
2153; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
2154; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2155; CGP-NEXT:    v_add_i32_e32 v4, vcc, v13, v4
2156; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2157; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
2158; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2159; CGP-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
2160; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
2161; CGP-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2162; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2163; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v14
2164; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2165; CGP-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
2166; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2167; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
2168; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2169; CGP-NEXT:    v_add_i32_e32 v8, vcc, v12, v8
2170; CGP-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
2171; CGP-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
2172; CGP-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
2173; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
2174; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2175; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
2176; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2177; CGP-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
2178; CGP-NEXT:    v_mul_lo_u32 v10, s10, v4
2179; CGP-NEXT:    v_mul_lo_u32 v12, 0, v4
2180; CGP-NEXT:    v_mul_hi_u32 v4, s10, v4
2181; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
2182; CGP-NEXT:    v_mul_lo_u32 v11, s10, v5
2183; CGP-NEXT:    v_mul_lo_u32 v13, 0, v5
2184; CGP-NEXT:    v_mul_hi_u32 v5, s10, v5
2185; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
2186; CGP-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
2187; CGP-NEXT:    v_mul_lo_u32 v6, s10, v6
2188; CGP-NEXT:    v_mul_lo_u32 v7, s10, v7
2189; CGP-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
2190; CGP-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
2191; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
2192; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
2193; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v10
2194; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
2195; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
2196; CGP-NEXT:    v_cmp_le_u32_e64 s[4:5], s10, v2
2197; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
2198; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v0, v11
2199; CGP-NEXT:    v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
2200; CGP-NEXT:    v_sub_i32_e64 v1, s[6:7], v1, v5
2201; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v0
2202; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
2203; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
2204; CGP-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[6:7]
2205; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2206; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v7
2207; CGP-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
2208; CGP-NEXT:    v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
2209; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
2210; CGP-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
2211; CGP-NEXT:    v_subrev_i32_e32 v8, vcc, s10, v2
2212; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2213; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v8
2214; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
2215; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
2216; CGP-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc
2217; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, s10, v0
2218; CGP-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
2219; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s10, v9
2220; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
2221; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v3
2222; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
2223; CGP-NEXT:    v_subrev_i32_e32 v13, vcc, s10, v8
2224; CGP-NEXT:    v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
2225; CGP-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
2226; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
2227; CGP-NEXT:    v_subrev_i32_e32 v16, vcc, s10, v9
2228; CGP-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
2229; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
2230; CGP-NEXT:    v_cndmask_b32_e32 v10, v12, v10, vcc
2231; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
2232; CGP-NEXT:    v_cndmask_b32_e32 v11, v15, v11, vcc
2233; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
2234; CGP-NEXT:    v_cndmask_b32_e32 v8, v8, v13, vcc
2235; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v11
2236; CGP-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
2237; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v14, vcc
2238; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
2239; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
2240; CGP-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[4:5]
2241; CGP-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v5
2242; CGP-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[4:5]
2243; CGP-NEXT:    v_cndmask_b32_e64 v1, v7, v1, s[4:5]
2244; CGP-NEXT:    v_cndmask_b32_e32 v3, v6, v3, vcc
2245; CGP-NEXT:    s_setpc_b64 s[30:31]
2246  %result = urem <2 x i64> %num, <i64 1235195, i64 1235195>
2247  ret <2 x i64> %result
2248}
2249
2250define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
2251; CHECK-LABEL: v_urem_i64_pow2_shl_denom:
2252; CHECK:       ; %bb.0:
2253; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2254; CHECK-NEXT:    s_movk_i32 s4, 0x1000
2255; CHECK-NEXT:    s_mov_b32 s5, 0
2256; CHECK-NEXT:    v_lshl_b64 v[4:5], s[4:5], v2
2257; CHECK-NEXT:    v_or_b32_e32 v3, v1, v5
2258; CHECK-NEXT:    v_mov_b32_e32 v2, 0
2259; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
2260; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
2261; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2262; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
2263; CHECK-NEXT:    s_cbranch_execz BB7_2
2264; CHECK-NEXT:  ; %bb.1:
2265; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v4
2266; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v5
2267; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
2268; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v5, vcc
2269; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
2270; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
2271; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
2272; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
2273; CHECK-NEXT:    v_trunc_f32_e32 v3, v3
2274; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
2275; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
2276; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
2277; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v3
2278; CHECK-NEXT:    v_mul_lo_u32 v9, v6, v2
2279; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v2
2280; CHECK-NEXT:    v_mul_hi_u32 v11, v6, v2
2281; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v10, v8
2282; CHECK-NEXT:    v_mul_lo_u32 v10, v3, v9
2283; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v9
2284; CHECK-NEXT:    v_mul_hi_u32 v9, v3, v9
2285; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
2286; CHECK-NEXT:    v_mul_lo_u32 v11, v2, v8
2287; CHECK-NEXT:    v_mul_lo_u32 v13, v3, v8
2288; CHECK-NEXT:    v_mul_hi_u32 v14, v2, v8
2289; CHECK-NEXT:    v_mul_hi_u32 v8, v3, v8
2290; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
2291; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2292; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v13, v9
2293; CHECK-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2294; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2295; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2296; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v14
2297; CHECK-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2298; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2299; CHECK-NEXT:    v_add_i32_e32 v11, vcc, v13, v12
2300; CHECK-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
2301; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2302; CHECK-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2303; CHECK-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2304; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v9
2305; CHECK-NEXT:    v_addc_u32_e64 v9, s[4:5], v3, v8, vcc
2306; CHECK-NEXT:    v_add_i32_e64 v3, s[4:5], v3, v8
2307; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v2
2308; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v2
2309; CHECK-NEXT:    v_mul_hi_u32 v10, v6, v2
2310; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v9
2311; CHECK-NEXT:    v_mul_lo_u32 v11, v9, v8
2312; CHECK-NEXT:    v_mul_hi_u32 v12, v2, v8
2313; CHECK-NEXT:    v_mul_hi_u32 v8, v9, v8
2314; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
2315; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
2316; CHECK-NEXT:    v_mul_lo_u32 v7, v2, v6
2317; CHECK-NEXT:    v_mul_lo_u32 v10, v9, v6
2318; CHECK-NEXT:    v_mul_hi_u32 v13, v2, v6
2319; CHECK-NEXT:    v_mul_hi_u32 v6, v9, v6
2320; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
2321; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
2322; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v10, v8
2323; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
2324; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v12
2325; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
2326; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v13
2327; CHECK-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2328; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v9, v7
2329; CHECK-NEXT:    v_add_i32_e64 v9, s[4:5], v10, v11
2330; CHECK-NEXT:    v_add_i32_e64 v7, s[4:5], v8, v7
2331; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s[4:5]
2332; CHECK-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
2333; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v8
2334; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
2335; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
2336; CHECK-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
2337; CHECK-NEXT:    v_mul_lo_u32 v6, v1, v2
2338; CHECK-NEXT:    v_mul_hi_u32 v7, v0, v2
2339; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
2340; CHECK-NEXT:    v_mul_lo_u32 v8, v0, v3
2341; CHECK-NEXT:    v_mul_lo_u32 v9, v1, v3
2342; CHECK-NEXT:    v_mul_hi_u32 v10, v0, v3
2343; CHECK-NEXT:    v_mul_hi_u32 v3, v1, v3
2344; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
2345; CHECK-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2346; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v9, v2
2347; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2348; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
2349; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2350; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
2351; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
2352; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
2353; CHECK-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
2354; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
2355; CHECK-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2356; CHECK-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
2357; CHECK-NEXT:    v_mul_lo_u32 v7, v4, v2
2358; CHECK-NEXT:    v_mul_lo_u32 v8, v5, v2
2359; CHECK-NEXT:    v_mul_hi_u32 v2, v4, v2
2360; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
2361; CHECK-NEXT:    v_mul_lo_u32 v3, v4, v3
2362; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
2363; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
2364; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v7
2365; CHECK-NEXT:    v_subb_u32_e64 v6, s[4:5], v1, v2, vcc
2366; CHECK-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v2
2367; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v3, v4
2368; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[4:5]
2369; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v5
2370; CHECK-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
2371; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
2372; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v5
2373; CHECK-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
2374; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v3, v4
2375; CHECK-NEXT:    v_subbrev_u32_e64 v8, s[4:5], 0, v1, vcc
2376; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v4
2377; CHECK-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[4:5]
2378; CHECK-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
2379; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
2380; CHECK-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
2381; CHECK-NEXT:    v_sub_i32_e32 v11, vcc, v7, v4
2382; CHECK-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
2383; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, v8, v5
2384; CHECK-NEXT:    v_cndmask_b32_e32 v5, v10, v9, vcc
2385; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
2386; CHECK-NEXT:    v_cndmask_b32_e32 v5, v7, v11, vcc
2387; CHECK-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
2388; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
2389; CHECK-NEXT:    v_cndmask_b32_e32 v2, v3, v5, vcc
2390; CHECK-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc
2391; CHECK-NEXT:  BB7_2: ; %Flow
2392; CHECK-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
2393; CHECK-NEXT:    s_xor_b64 exec, exec, s[4:5]
2394; CHECK-NEXT:    s_cbranch_execz BB7_4
2395; CHECK-NEXT:  ; %bb.3:
2396; CHECK-NEXT:    v_cvt_f32_u32_e32 v1, v4
2397; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, 0, v4
2398; CHECK-NEXT:    v_rcp_iflag_f32_e32 v1, v1
2399; CHECK-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
2400; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
2401; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v1
2402; CHECK-NEXT:    v_mul_hi_u32 v2, v1, v2
2403; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
2404; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
2405; CHECK-NEXT:    v_mul_lo_u32 v1, v1, v4
2406; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
2407; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
2408; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
2409; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2410; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
2411; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
2412; CHECK-NEXT:    v_cndmask_b32_e32 v2, v0, v1, vcc
2413; CHECK-NEXT:    v_mov_b32_e32 v3, 0
2414; CHECK-NEXT:  BB7_4:
2415; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
2416; CHECK-NEXT:    v_mov_b32_e32 v0, v2
2417; CHECK-NEXT:    v_mov_b32_e32 v1, v3
2418; CHECK-NEXT:    s_setpc_b64 s[30:31]
2419  %shl.y = shl i64 4096, %y
2420  %r = urem i64 %x, %shl.y
2421  ret i64 %r
2422}
2423
2424define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
2425; GISEL-LABEL: v_urem_v2i64_pow2_shl_denom:
2426; GISEL:       ; %bb.0:
2427; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2428; GISEL-NEXT:    s_movk_i32 s4, 0x1000
2429; GISEL-NEXT:    s_mov_b32 s5, 0
2430; GISEL-NEXT:    v_lshl_b64 v[4:5], s[4:5], v4
2431; GISEL-NEXT:    v_lshl_b64 v[6:7], s[4:5], v6
2432; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
2433; GISEL-NEXT:    v_cvt_f32_u32_e32 v9, v5
2434; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v9
2435; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
2436; GISEL-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
2437; GISEL-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
2438; GISEL-NEXT:    v_trunc_f32_e32 v9, v9
2439; GISEL-NEXT:    v_mac_f32_e32 v8, 0xcf800000, v9
2440; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
2441; GISEL-NEXT:    v_cvt_u32_f32_e32 v9, v9
2442; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v4
2443; GISEL-NEXT:    v_subb_u32_e32 v11, vcc, 0, v5, vcc
2444; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
2445; GISEL-NEXT:    v_mul_lo_u32 v13, v11, v8
2446; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v9
2447; GISEL-NEXT:    v_mul_hi_u32 v15, v10, v8
2448; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2449; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
2450; GISEL-NEXT:    v_mul_lo_u32 v14, v9, v12
2451; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v13
2452; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v12
2453; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
2454; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2455; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
2456; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2457; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2458; GISEL-NEXT:    v_mul_lo_u32 v15, v9, v13
2459; GISEL-NEXT:    v_mul_hi_u32 v12, v9, v12
2460; GISEL-NEXT:    v_mul_hi_u32 v16, v8, v13
2461; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
2462; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2463; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v16
2464; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
2465; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
2466; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2467; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2468; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2469; GISEL-NEXT:    v_mul_hi_u32 v13, v9, v13
2470; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2471; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
2472; GISEL-NEXT:    v_addc_u32_e64 v12, s[4:5], v9, v13, vcc
2473; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
2474; GISEL-NEXT:    v_mul_lo_u32 v13, v10, v8
2475; GISEL-NEXT:    v_mul_lo_u32 v11, v11, v8
2476; GISEL-NEXT:    v_mul_lo_u32 v14, v10, v12
2477; GISEL-NEXT:    v_mul_hi_u32 v10, v10, v8
2478; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
2479; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
2480; GISEL-NEXT:    v_mul_lo_u32 v11, v12, v13
2481; GISEL-NEXT:    v_mul_lo_u32 v14, v8, v10
2482; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v13
2483; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v14
2484; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2485; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v15
2486; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2487; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v14, v11
2488; GISEL-NEXT:    v_mul_lo_u32 v14, v12, v10
2489; GISEL-NEXT:    v_mul_hi_u32 v13, v12, v13
2490; GISEL-NEXT:    v_mul_hi_u32 v15, v8, v10
2491; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
2492; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2493; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v13, v15
2494; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2495; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v15
2496; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v13, v11
2497; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2498; GISEL-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v13
2499; GISEL-NEXT:    v_mul_hi_u32 v10, v12, v10
2500; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v13
2501; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
2502; GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v9, v10, vcc
2503; GISEL-NEXT:    v_addc_u32_e64 v9, vcc, 0, v9, s[4:5]
2504; GISEL-NEXT:    v_mul_lo_u32 v10, v1, v8
2505; GISEL-NEXT:    v_mul_lo_u32 v11, v0, v9
2506; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v8
2507; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
2508; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2509; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2510; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2511; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2512; GISEL-NEXT:    v_mul_lo_u32 v11, v1, v9
2513; GISEL-NEXT:    v_mul_hi_u32 v8, v1, v8
2514; GISEL-NEXT:    v_mul_hi_u32 v12, v0, v9
2515; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
2516; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2517; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v12
2518; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2519; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2520; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2521; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2522; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
2523; GISEL-NEXT:    v_mul_hi_u32 v9, v1, v9
2524; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
2525; GISEL-NEXT:    v_mul_lo_u32 v10, v4, v8
2526; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v8
2527; GISEL-NEXT:    v_mul_lo_u32 v9, v4, v9
2528; GISEL-NEXT:    v_mul_hi_u32 v8, v4, v8
2529; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
2530; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
2531; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
2532; GISEL-NEXT:    v_subb_u32_e64 v9, s[4:5], v1, v8, vcc
2533; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], v1, v8
2534; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v9, v5
2535; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2536; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v4
2537; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[4:5]
2538; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v9, v5
2539; GISEL-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
2540; GISEL-NEXT:    v_sub_i32_e64 v10, s[4:5], v0, v4
2541; GISEL-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
2542; GISEL-NEXT:    v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5]
2543; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v5
2544; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
2545; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v4
2546; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
2547; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v5
2548; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
2549; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v10, v4
2550; GISEL-NEXT:    v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5]
2551; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
2552; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
2553; GISEL-NEXT:    v_cndmask_b32_e32 v4, v10, v4, vcc
2554; GISEL-NEXT:    v_cndmask_b32_e32 v1, v11, v1, vcc
2555; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
2556; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
2557; GISEL-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
2558; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v6
2559; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v7
2560; GISEL-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
2561; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
2562; GISEL-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
2563; GISEL-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
2564; GISEL-NEXT:    v_trunc_f32_e32 v5, v5
2565; GISEL-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
2566; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
2567; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
2568; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, 0, v6
2569; GISEL-NEXT:    v_subb_u32_e32 v9, vcc, 0, v7, vcc
2570; GISEL-NEXT:    v_mul_lo_u32 v10, v8, v4
2571; GISEL-NEXT:    v_mul_lo_u32 v11, v9, v4
2572; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v5
2573; GISEL-NEXT:    v_mul_hi_u32 v13, v8, v4
2574; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2575; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
2576; GISEL-NEXT:    v_mul_lo_u32 v12, v5, v10
2577; GISEL-NEXT:    v_mul_lo_u32 v13, v4, v11
2578; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v10
2579; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
2580; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2581; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2582; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2583; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2584; GISEL-NEXT:    v_mul_lo_u32 v13, v5, v11
2585; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v10
2586; GISEL-NEXT:    v_mul_hi_u32 v14, v4, v11
2587; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
2588; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2589; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v14
2590; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2591; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2592; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2593; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2594; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2595; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v11
2596; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2597; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
2598; GISEL-NEXT:    v_addc_u32_e64 v10, s[4:5], v5, v11, vcc
2599; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v11
2600; GISEL-NEXT:    v_mul_lo_u32 v11, v8, v4
2601; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v4
2602; GISEL-NEXT:    v_mul_lo_u32 v12, v8, v10
2603; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
2604; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
2605; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v9, v8
2606; GISEL-NEXT:    v_mul_lo_u32 v9, v10, v11
2607; GISEL-NEXT:    v_mul_lo_u32 v12, v4, v8
2608; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v11
2609; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v12
2610; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2611; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v9, v13
2612; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s[4:5]
2613; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v12, v9
2614; GISEL-NEXT:    v_mul_lo_u32 v12, v10, v8
2615; GISEL-NEXT:    v_mul_hi_u32 v11, v10, v11
2616; GISEL-NEXT:    v_mul_hi_u32 v13, v4, v8
2617; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
2618; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2619; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v11, v13
2620; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2621; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v13
2622; GISEL-NEXT:    v_add_i32_e64 v9, s[4:5], v11, v9
2623; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2624; GISEL-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v11
2625; GISEL-NEXT:    v_mul_hi_u32 v8, v10, v8
2626; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v11
2627; GISEL-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v9
2628; GISEL-NEXT:    v_addc_u32_e32 v5, vcc, v5, v8, vcc
2629; GISEL-NEXT:    v_addc_u32_e64 v5, vcc, 0, v5, s[4:5]
2630; GISEL-NEXT:    v_mul_lo_u32 v8, v3, v4
2631; GISEL-NEXT:    v_mul_lo_u32 v9, v2, v5
2632; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v4
2633; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
2634; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2635; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
2636; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2637; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
2638; GISEL-NEXT:    v_mul_lo_u32 v9, v3, v5
2639; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
2640; GISEL-NEXT:    v_mul_hi_u32 v10, v2, v5
2641; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
2642; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
2643; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v10
2644; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2645; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
2646; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v8
2647; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
2648; GISEL-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
2649; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v5
2650; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
2651; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v4
2652; GISEL-NEXT:    v_mul_lo_u32 v9, v7, v4
2653; GISEL-NEXT:    v_mul_lo_u32 v5, v6, v5
2654; GISEL-NEXT:    v_mul_hi_u32 v4, v6, v4
2655; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
2656; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
2657; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v2, v8
2658; GISEL-NEXT:    v_subb_u32_e64 v5, s[4:5], v3, v4, vcc
2659; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
2660; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v7
2661; GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
2662; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v2, v6
2663; GISEL-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2664; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v7
2665; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
2666; GISEL-NEXT:    v_sub_i32_e64 v8, s[4:5], v2, v6
2667; GISEL-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
2668; GISEL-NEXT:    v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5]
2669; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v7
2670; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
2671; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v6
2672; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
2673; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, v9, v7
2674; GISEL-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
2675; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v8, v6
2676; GISEL-NEXT:    v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5]
2677; GISEL-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2678; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
2679; GISEL-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
2680; GISEL-NEXT:    v_cndmask_b32_e32 v3, v9, v3, vcc
2681; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
2682; GISEL-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
2683; GISEL-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
2684; GISEL-NEXT:    s_setpc_b64 s[30:31]
2685;
2686; CGP-LABEL: v_urem_v2i64_pow2_shl_denom:
2687; CGP:       ; %bb.0:
2688; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2689; CGP-NEXT:    v_mov_b32_e32 v5, v0
2690; CGP-NEXT:    v_mov_b32_e32 v7, v1
2691; CGP-NEXT:    s_movk_i32 s4, 0x1000
2692; CGP-NEXT:    s_mov_b32 s5, 0
2693; CGP-NEXT:    v_lshl_b64 v[10:11], s[4:5], v4
2694; CGP-NEXT:    v_lshl_b64 v[8:9], s[4:5], v6
2695; CGP-NEXT:    v_or_b32_e32 v1, v7, v11
2696; CGP-NEXT:    v_mov_b32_e32 v0, 0
2697; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
2698; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
2699; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2700; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
2701; CGP-NEXT:    s_cbranch_execz BB8_2
2702; CGP-NEXT:  ; %bb.1:
2703; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
2704; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v11
2705; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v10
2706; CGP-NEXT:    v_subb_u32_e32 v6, vcc, 0, v11, vcc
2707; CGP-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
2708; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2709; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
2710; CGP-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
2711; CGP-NEXT:    v_trunc_f32_e32 v1, v1
2712; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
2713; CGP-NEXT:    v_cvt_u32_f32_e32 v1, v1
2714; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
2715; CGP-NEXT:    v_mul_lo_u32 v12, v4, v1
2716; CGP-NEXT:    v_mul_lo_u32 v13, v4, v0
2717; CGP-NEXT:    v_mul_lo_u32 v14, v6, v0
2718; CGP-NEXT:    v_mul_hi_u32 v15, v4, v0
2719; CGP-NEXT:    v_add_i32_e32 v12, vcc, v14, v12
2720; CGP-NEXT:    v_mul_lo_u32 v14, v1, v13
2721; CGP-NEXT:    v_mul_hi_u32 v16, v0, v13
2722; CGP-NEXT:    v_mul_hi_u32 v13, v1, v13
2723; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
2724; CGP-NEXT:    v_mul_lo_u32 v15, v0, v12
2725; CGP-NEXT:    v_mul_lo_u32 v17, v1, v12
2726; CGP-NEXT:    v_mul_hi_u32 v18, v0, v12
2727; CGP-NEXT:    v_mul_hi_u32 v12, v1, v12
2728; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v15
2729; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2730; CGP-NEXT:    v_add_i32_e32 v13, vcc, v17, v13
2731; CGP-NEXT:    v_cndmask_b32_e64 v17, 0, 1, vcc
2732; CGP-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
2733; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2734; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v18
2735; CGP-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
2736; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2737; CGP-NEXT:    v_add_i32_e32 v15, vcc, v17, v16
2738; CGP-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
2739; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2740; CGP-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
2741; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2742; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v13
2743; CGP-NEXT:    v_addc_u32_e64 v13, s[4:5], v1, v12, vcc
2744; CGP-NEXT:    v_add_i32_e64 v1, s[4:5], v1, v12
2745; CGP-NEXT:    v_mul_lo_u32 v12, v4, v0
2746; CGP-NEXT:    v_mul_lo_u32 v6, v6, v0
2747; CGP-NEXT:    v_mul_hi_u32 v14, v4, v0
2748; CGP-NEXT:    v_mul_lo_u32 v4, v4, v13
2749; CGP-NEXT:    v_mul_lo_u32 v15, v13, v12
2750; CGP-NEXT:    v_mul_hi_u32 v16, v0, v12
2751; CGP-NEXT:    v_mul_hi_u32 v12, v13, v12
2752; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v6, v4
2753; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v14
2754; CGP-NEXT:    v_mul_lo_u32 v6, v0, v4
2755; CGP-NEXT:    v_mul_lo_u32 v14, v13, v4
2756; CGP-NEXT:    v_mul_hi_u32 v17, v0, v4
2757; CGP-NEXT:    v_mul_hi_u32 v4, v13, v4
2758; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v15, v6
2759; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2760; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v14, v12
2761; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2762; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v16
2763; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s[4:5]
2764; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v17
2765; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2766; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v13, v6
2767; CGP-NEXT:    v_add_i32_e64 v13, s[4:5], v14, v15
2768; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v12, v6
2769; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2770; CGP-NEXT:    v_add_i32_e64 v12, s[4:5], v13, v12
2771; CGP-NEXT:    v_add_i32_e64 v4, s[4:5], v4, v12
2772; CGP-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
2773; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
2774; CGP-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2775; CGP-NEXT:    v_mul_lo_u32 v4, v7, v0
2776; CGP-NEXT:    v_mul_hi_u32 v6, v5, v0
2777; CGP-NEXT:    v_mul_hi_u32 v0, v7, v0
2778; CGP-NEXT:    v_mul_lo_u32 v12, v5, v1
2779; CGP-NEXT:    v_mul_lo_u32 v13, v7, v1
2780; CGP-NEXT:    v_mul_hi_u32 v14, v5, v1
2781; CGP-NEXT:    v_mul_hi_u32 v1, v7, v1
2782; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
2783; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2784; CGP-NEXT:    v_add_i32_e32 v0, vcc, v13, v0
2785; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2786; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
2787; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2788; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v14
2789; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2790; CGP-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
2791; CGP-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
2792; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
2793; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
2794; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
2795; CGP-NEXT:    v_mul_lo_u32 v6, v10, v0
2796; CGP-NEXT:    v_mul_lo_u32 v12, v11, v0
2797; CGP-NEXT:    v_mul_hi_u32 v0, v10, v0
2798; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
2799; CGP-NEXT:    v_mul_lo_u32 v1, v10, v1
2800; CGP-NEXT:    v_add_i32_e32 v1, vcc, v12, v1
2801; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
2802; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v5, v6
2803; CGP-NEXT:    v_subb_u32_e64 v4, s[4:5], v7, v0, vcc
2804; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v7, v0
2805; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v10
2806; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[4:5]
2807; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v4, v11
2808; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
2809; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v11, vcc
2810; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v11
2811; CGP-NEXT:    v_cndmask_b32_e32 v6, v7, v6, vcc
2812; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v10
2813; CGP-NEXT:    v_subbrev_u32_e64 v12, s[4:5], 0, v0, vcc
2814; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v10
2815; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[4:5]
2816; CGP-NEXT:    v_subb_u32_e32 v0, vcc, v0, v11, vcc
2817; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v12, v11
2818; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
2819; CGP-NEXT:    v_sub_i32_e32 v15, vcc, v7, v10
2820; CGP-NEXT:    v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
2821; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v12, v11
2822; CGP-NEXT:    v_cndmask_b32_e32 v11, v14, v13, vcc
2823; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
2824; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v15, vcc
2825; CGP-NEXT:    v_cndmask_b32_e32 v11, v12, v0, vcc
2826; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
2827; CGP-NEXT:    v_cndmask_b32_e32 v0, v1, v7, vcc
2828; CGP-NEXT:    v_cndmask_b32_e32 v1, v4, v11, vcc
2829; CGP-NEXT:  BB8_2: ; %Flow2
2830; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
2831; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
2832; CGP-NEXT:    s_cbranch_execz BB8_4
2833; CGP-NEXT:  ; %bb.3:
2834; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
2835; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v10
2836; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2837; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2838; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
2839; CGP-NEXT:    v_mul_lo_u32 v1, v1, v0
2840; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
2841; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
2842; CGP-NEXT:    v_mul_hi_u32 v0, v5, v0
2843; CGP-NEXT:    v_mul_lo_u32 v0, v0, v10
2844; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v5, v0
2845; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v10
2846; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v10
2847; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2848; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v10
2849; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v10
2850; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2851; CGP-NEXT:    v_mov_b32_e32 v1, 0
2852; CGP-NEXT:  BB8_4:
2853; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
2854; CGP-NEXT:    v_or_b32_e32 v5, v3, v9
2855; CGP-NEXT:    v_mov_b32_e32 v4, 0
2856; CGP-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
2857; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
2858; CGP-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2859; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[4:5]
2860; CGP-NEXT:    s_cbranch_execz BB8_6
2861; CGP-NEXT:  ; %bb.5:
2862; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v8
2863; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v9
2864; CGP-NEXT:    v_sub_i32_e32 v6, vcc, 0, v8
2865; CGP-NEXT:    v_subb_u32_e32 v7, vcc, 0, v9, vcc
2866; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
2867; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
2868; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
2869; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
2870; CGP-NEXT:    v_trunc_f32_e32 v5, v5
2871; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
2872; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
2873; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
2874; CGP-NEXT:    v_mul_lo_u32 v10, v6, v5
2875; CGP-NEXT:    v_mul_lo_u32 v11, v6, v4
2876; CGP-NEXT:    v_mul_lo_u32 v12, v7, v4
2877; CGP-NEXT:    v_mul_hi_u32 v13, v6, v4
2878; CGP-NEXT:    v_add_i32_e32 v10, vcc, v12, v10
2879; CGP-NEXT:    v_mul_lo_u32 v12, v5, v11
2880; CGP-NEXT:    v_mul_hi_u32 v14, v4, v11
2881; CGP-NEXT:    v_mul_hi_u32 v11, v5, v11
2882; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
2883; CGP-NEXT:    v_mul_lo_u32 v13, v4, v10
2884; CGP-NEXT:    v_mul_lo_u32 v15, v5, v10
2885; CGP-NEXT:    v_mul_hi_u32 v16, v4, v10
2886; CGP-NEXT:    v_mul_hi_u32 v10, v5, v10
2887; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v13
2888; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
2889; CGP-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
2890; CGP-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
2891; CGP-NEXT:    v_add_i32_e32 v12, vcc, v12, v14
2892; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2893; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v16
2894; CGP-NEXT:    v_cndmask_b32_e64 v14, 0, 1, vcc
2895; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2896; CGP-NEXT:    v_add_i32_e32 v13, vcc, v15, v14
2897; CGP-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
2898; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
2899; CGP-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
2900; CGP-NEXT:    v_add_i32_e32 v10, vcc, v10, v12
2901; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
2902; CGP-NEXT:    v_addc_u32_e64 v11, s[4:5], v5, v10, vcc
2903; CGP-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v10
2904; CGP-NEXT:    v_mul_lo_u32 v10, v6, v4
2905; CGP-NEXT:    v_mul_lo_u32 v7, v7, v4
2906; CGP-NEXT:    v_mul_hi_u32 v12, v6, v4
2907; CGP-NEXT:    v_mul_lo_u32 v6, v6, v11
2908; CGP-NEXT:    v_mul_lo_u32 v13, v11, v10
2909; CGP-NEXT:    v_mul_hi_u32 v14, v4, v10
2910; CGP-NEXT:    v_mul_hi_u32 v10, v11, v10
2911; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v7, v6
2912; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v12
2913; CGP-NEXT:    v_mul_lo_u32 v7, v4, v6
2914; CGP-NEXT:    v_mul_lo_u32 v12, v11, v6
2915; CGP-NEXT:    v_mul_hi_u32 v15, v4, v6
2916; CGP-NEXT:    v_mul_hi_u32 v6, v11, v6
2917; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v13, v7
2918; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s[4:5]
2919; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v12, v10
2920; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2921; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v7, v14
2922; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s[4:5]
2923; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v10, v15
2924; CGP-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2925; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v11, v7
2926; CGP-NEXT:    v_add_i32_e64 v11, s[4:5], v12, v13
2927; CGP-NEXT:    v_add_i32_e64 v7, s[4:5], v10, v7
2928; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[4:5]
2929; CGP-NEXT:    v_add_i32_e64 v10, s[4:5], v11, v10
2930; CGP-NEXT:    v_add_i32_e64 v6, s[4:5], v6, v10
2931; CGP-NEXT:    v_addc_u32_e32 v5, vcc, v5, v6, vcc
2932; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
2933; CGP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
2934; CGP-NEXT:    v_mul_lo_u32 v6, v3, v4
2935; CGP-NEXT:    v_mul_hi_u32 v7, v2, v4
2936; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
2937; CGP-NEXT:    v_mul_lo_u32 v10, v2, v5
2938; CGP-NEXT:    v_mul_lo_u32 v11, v3, v5
2939; CGP-NEXT:    v_mul_hi_u32 v12, v2, v5
2940; CGP-NEXT:    v_mul_hi_u32 v5, v3, v5
2941; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v10
2942; CGP-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
2943; CGP-NEXT:    v_add_i32_e32 v4, vcc, v11, v4
2944; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
2945; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
2946; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2947; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
2948; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
2949; CGP-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
2950; CGP-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
2951; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
2952; CGP-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
2953; CGP-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
2954; CGP-NEXT:    v_mul_lo_u32 v7, v8, v4
2955; CGP-NEXT:    v_mul_lo_u32 v10, v9, v4
2956; CGP-NEXT:    v_mul_hi_u32 v4, v8, v4
2957; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
2958; CGP-NEXT:    v_mul_lo_u32 v5, v8, v5
2959; CGP-NEXT:    v_add_i32_e32 v5, vcc, v10, v5
2960; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
2961; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v2, v7
2962; CGP-NEXT:    v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
2963; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
2964; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v5, v8
2965; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[4:5]
2966; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v6, v9
2967; CGP-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
2968; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
2969; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v6, v9
2970; CGP-NEXT:    v_cndmask_b32_e32 v4, v7, v4, vcc
2971; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v5, v8
2972; CGP-NEXT:    v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc
2973; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v8
2974; CGP-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[4:5]
2975; CGP-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
2976; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v9
2977; CGP-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
2978; CGP-NEXT:    v_sub_i32_e32 v13, vcc, v7, v8
2979; CGP-NEXT:    v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2980; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
2981; CGP-NEXT:    v_cndmask_b32_e32 v9, v12, v11, vcc
2982; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v9
2983; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v13, vcc
2984; CGP-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
2985; CGP-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
2986; CGP-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
2987; CGP-NEXT:    v_cndmask_b32_e32 v5, v6, v3, vcc
2988; CGP-NEXT:  BB8_6: ; %Flow
2989; CGP-NEXT:    s_or_saveexec_b64 s[4:5], s[6:7]
2990; CGP-NEXT:    s_xor_b64 exec, exec, s[4:5]
2991; CGP-NEXT:    s_cbranch_execz BB8_8
2992; CGP-NEXT:  ; %bb.7:
2993; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v8
2994; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v8
2995; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
2996; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
2997; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
2998; CGP-NEXT:    v_mul_lo_u32 v4, v4, v3
2999; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
3000; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
3001; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
3002; CGP-NEXT:    v_mul_lo_u32 v3, v3, v8
3003; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
3004; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v8
3005; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v8
3006; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
3007; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v2, v8
3008; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v8
3009; CGP-NEXT:    v_cndmask_b32_e32 v4, v2, v3, vcc
3010; CGP-NEXT:    v_mov_b32_e32 v5, 0
3011; CGP-NEXT:  BB8_8:
3012; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
3013; CGP-NEXT:    v_mov_b32_e32 v2, v4
3014; CGP-NEXT:    v_mov_b32_e32 v3, v5
3015; CGP-NEXT:    s_setpc_b64 s[30:31]
3016  %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
3017  %r = urem <2 x i64> %x, %shl.y
3018  ret <2 x i64> %r
3019}
3020
3021define i64 @v_urem_i64_24bit(i64 %num, i64 %den) {
3022; GISEL-LABEL: v_urem_i64_24bit:
3023; GISEL:       ; %bb.0:
3024; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3025; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
3026; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
3027; GISEL-NEXT:    v_and_b32_e32 v1, s4, v2
3028; GISEL-NEXT:    v_cvt_f32_u32_e32 v2, v1
3029; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
3030; GISEL-NEXT:    v_rcp_iflag_f32_e32 v2, v2
3031; GISEL-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
3032; GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
3033; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v2
3034; GISEL-NEXT:    v_mul_hi_u32 v3, v2, v3
3035; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
3036; GISEL-NEXT:    v_mul_hi_u32 v2, v0, v2
3037; GISEL-NEXT:    v_mul_lo_u32 v2, v2, v1
3038; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
3039; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
3040; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
3041; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3042; GISEL-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
3043; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
3044; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
3045; GISEL-NEXT:    v_mov_b32_e32 v1, 0
3046; GISEL-NEXT:    s_setpc_b64 s[30:31]
3047;
3048; CGP-LABEL: v_urem_i64_24bit:
3049; CGP:       ; %bb.0:
3050; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3051; CGP-NEXT:    s_mov_b32 s4, 0xffffff
3052; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
3053; CGP-NEXT:    v_and_b32_e32 v1, s4, v2
3054; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v0
3055; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v1
3056; CGP-NEXT:    v_rcp_f32_e32 v4, v3
3057; CGP-NEXT:    v_mul_f32_e32 v4, v2, v4
3058; CGP-NEXT:    v_trunc_f32_e32 v4, v4
3059; CGP-NEXT:    v_mad_f32 v2, -v4, v3, v2
3060; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
3061; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, v3
3062; CGP-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[4:5]
3063; CGP-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
3064; CGP-NEXT:    v_mul_lo_u32 v1, v2, v1
3065; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
3066; CGP-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
3067; CGP-NEXT:    v_mov_b32_e32 v1, 0
3068; CGP-NEXT:    s_setpc_b64 s[30:31]
3069  %num.mask = and i64 %num, 16777215
3070  %den.mask = and i64 %den, 16777215
3071  %result = urem i64 %num.mask, %den.mask
3072  ret i64 %result
3073}
3074
3075define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
3076; GISEL-LABEL: v_urem_v2i64_24bit:
3077; GISEL:       ; %bb.0:
3078; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3079; GISEL-NEXT:    s_mov_b32 s6, 0xffffff
3080; GISEL-NEXT:    v_cvt_f32_ubyte0_e32 v1, 0
3081; GISEL-NEXT:    v_and_b32_e32 v3, s6, v4
3082; GISEL-NEXT:    v_and_b32_e32 v4, s6, v6
3083; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
3084; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v3
3085; GISEL-NEXT:    v_subb_u32_e64 v7, s[4:5], 0, 0, vcc
3086; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v4
3087; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v4
3088; GISEL-NEXT:    v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
3089; GISEL-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v1
3090; GISEL-NEXT:    v_mac_f32_e32 v8, 0x4f800000, v1
3091; GISEL-NEXT:    v_rcp_iflag_f32_e32 v1, v5
3092; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v8
3093; GISEL-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
3094; GISEL-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
3095; GISEL-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v1
3096; GISEL-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v5
3097; GISEL-NEXT:    v_trunc_f32_e32 v8, v8
3098; GISEL-NEXT:    v_trunc_f32_e32 v11, v11
3099; GISEL-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v8
3100; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
3101; GISEL-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v11
3102; GISEL-NEXT:    v_cvt_u32_f32_e32 v11, v11
3103; GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
3104; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v8
3105; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
3106; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v11
3107; GISEL-NEXT:    v_mul_lo_u32 v14, v6, v1
3108; GISEL-NEXT:    v_mul_lo_u32 v15, v7, v1
3109; GISEL-NEXT:    v_mul_hi_u32 v16, v6, v1
3110; GISEL-NEXT:    v_mul_lo_u32 v17, v9, v5
3111; GISEL-NEXT:    v_mul_lo_u32 v18, v10, v5
3112; GISEL-NEXT:    v_mul_hi_u32 v19, v9, v5
3113; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v15, v12
3114; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v18, v13
3115; GISEL-NEXT:    v_mul_lo_u32 v15, v11, v17
3116; GISEL-NEXT:    v_mul_hi_u32 v18, v5, v17
3117; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v19
3118; GISEL-NEXT:    v_mul_lo_u32 v19, v5, v13
3119; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v19
3120; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
3121; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
3122; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v14
3123; GISEL-NEXT:    v_mul_hi_u32 v18, v1, v14
3124; GISEL-NEXT:    v_mul_hi_u32 v14, v8, v14
3125; GISEL-NEXT:    v_mul_hi_u32 v17, v11, v17
3126; GISEL-NEXT:    v_add_i32_e64 v12, s[4:5], v12, v16
3127; GISEL-NEXT:    v_mul_lo_u32 v16, v1, v12
3128; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v16
3129; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s[4:5]
3130; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
3131; GISEL-NEXT:    v_mul_lo_u32 v15, v8, v12
3132; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
3133; GISEL-NEXT:    v_add_i32_e64 v16, s[4:5], v16, v18
3134; GISEL-NEXT:    v_mul_hi_u32 v18, v1, v12
3135; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v15, v14
3136; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[4:5]
3137; GISEL-NEXT:    v_add_i32_e64 v14, s[4:5], v14, v18
3138; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[4:5]
3139; GISEL-NEXT:    v_add_i32_e64 v15, s[4:5], v15, v18
3140; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
3141; GISEL-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
3142; GISEL-NEXT:    v_mul_lo_u32 v19, v11, v13
3143; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v19, v17
3144; GISEL-NEXT:    v_mul_hi_u32 v19, v5, v13
3145; GISEL-NEXT:    v_cndmask_b32_e64 v20, 0, 1, vcc
3146; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
3147; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, vcc
3148; GISEL-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
3149; GISEL-NEXT:    v_and_b32_e32 v0, s6, v0
3150; GISEL-NEXT:    v_and_b32_e32 v2, s6, v2
3151; GISEL-NEXT:    v_mul_hi_u32 v12, v8, v12
3152; GISEL-NEXT:    v_mul_hi_u32 v13, v11, v13
3153; GISEL-NEXT:    v_add_i32_e32 v14, vcc, v14, v16
3154; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
3155; GISEL-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
3156; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, vcc
3157; GISEL-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
3158; GISEL-NEXT:    v_add_i32_e32 v16, vcc, v19, v18
3159; GISEL-NEXT:    v_add_i32_e32 v12, vcc, v12, v15
3160; GISEL-NEXT:    v_add_i32_e32 v13, vcc, v13, v16
3161; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
3162; GISEL-NEXT:    v_addc_u32_e64 v14, s[4:5], v8, v12, vcc
3163; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], v8, v12
3164; GISEL-NEXT:    v_mul_lo_u32 v12, v6, v1
3165; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v1
3166; GISEL-NEXT:    v_mul_hi_u32 v15, v6, v1
3167; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v5, v17
3168; GISEL-NEXT:    v_addc_u32_e64 v16, s[6:7], v11, v13, s[4:5]
3169; GISEL-NEXT:    v_add_i32_e64 v11, s[6:7], v11, v13
3170; GISEL-NEXT:    v_mul_lo_u32 v13, v9, v5
3171; GISEL-NEXT:    v_mul_lo_u32 v10, v10, v5
3172; GISEL-NEXT:    v_mul_hi_u32 v17, v9, v5
3173; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v14
3174; GISEL-NEXT:    v_mul_lo_u32 v18, v14, v12
3175; GISEL-NEXT:    v_mul_hi_u32 v19, v1, v12
3176; GISEL-NEXT:    v_mul_hi_u32 v12, v14, v12
3177; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v16
3178; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v7, v6
3179; GISEL-NEXT:    v_mul_lo_u32 v7, v16, v13
3180; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v10, v9
3181; GISEL-NEXT:    v_mul_hi_u32 v10, v5, v13
3182; GISEL-NEXT:    v_mul_hi_u32 v13, v16, v13
3183; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v15
3184; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v17
3185; GISEL-NEXT:    v_mul_lo_u32 v15, v1, v6
3186; GISEL-NEXT:    v_mul_lo_u32 v17, v5, v9
3187; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v17
3188; GISEL-NEXT:    v_cndmask_b32_e64 v17, 0, 1, s[6:7]
3189; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v10
3190; GISEL-NEXT:    v_mul_lo_u32 v7, v14, v6
3191; GISEL-NEXT:    v_mul_hi_u32 v10, v1, v6
3192; GISEL-NEXT:    v_mul_hi_u32 v6, v14, v6
3193; GISEL-NEXT:    v_mul_lo_u32 v14, v16, v9
3194; GISEL-NEXT:    v_mul_hi_u32 v16, v16, v9
3195; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v9
3196; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v18, v15
3197; GISEL-NEXT:    v_cndmask_b32_e64 v18, 0, 1, s[8:9]
3198; GISEL-NEXT:    v_add_i32_e64 v7, s[8:9], v7, v12
3199; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[8:9]
3200; GISEL-NEXT:    v_add_i32_e64 v13, s[8:9], v14, v13
3201; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[8:9]
3202; GISEL-NEXT:    v_add_i32_e64 v15, s[8:9], v15, v19
3203; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s[8:9]
3204; GISEL-NEXT:    v_add_i32_e64 v7, s[8:9], v7, v10
3205; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s[8:9]
3206; GISEL-NEXT:    v_cndmask_b32_e64 v19, 0, 1, s[6:7]
3207; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v13, v9
3208; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s[6:7]
3209; GISEL-NEXT:    v_add_i32_e64 v15, s[6:7], v18, v15
3210; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v12, v10
3211; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v17, v19
3212; GISEL-NEXT:    v_add_i32_e64 v13, s[6:7], v14, v13
3213; GISEL-NEXT:    v_add_i32_e64 v7, s[6:7], v7, v15
3214; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s[6:7]
3215; GISEL-NEXT:    v_add_i32_e64 v9, s[6:7], v9, v12
3216; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s[6:7]
3217; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v10, v14
3218; GISEL-NEXT:    v_add_i32_e64 v12, s[6:7], v13, v12
3219; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v6, v10
3220; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v16, v12
3221; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, v8, v6, vcc
3222; GISEL-NEXT:    v_addc_u32_e64 v8, vcc, v11, v10, s[4:5]
3223; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v7
3224; GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v6, vcc
3225; GISEL-NEXT:    v_mul_lo_u32 v7, 0, v1
3226; GISEL-NEXT:    v_mul_hi_u32 v10, v0, v1
3227; GISEL-NEXT:    v_mul_hi_u32 v1, 0, v1
3228; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
3229; GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
3230; GISEL-NEXT:    v_mul_lo_u32 v9, 0, v5
3231; GISEL-NEXT:    v_mul_hi_u32 v11, v2, v5
3232; GISEL-NEXT:    v_mul_hi_u32 v5, 0, v5
3233; GISEL-NEXT:    v_mul_lo_u32 v12, v0, v6
3234; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v6
3235; GISEL-NEXT:    v_mul_hi_u32 v14, v0, v6
3236; GISEL-NEXT:    v_mul_hi_u32 v6, 0, v6
3237; GISEL-NEXT:    v_mul_lo_u32 v15, v2, v8
3238; GISEL-NEXT:    v_mul_lo_u32 v16, 0, v8
3239; GISEL-NEXT:    v_mul_hi_u32 v17, v2, v8
3240; GISEL-NEXT:    v_mul_hi_u32 v8, 0, v8
3241; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v12
3242; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, 1, vcc
3243; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v13, v1
3244; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, 1, vcc
3245; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v15
3246; GISEL-NEXT:    v_cndmask_b32_e64 v15, 0, 1, vcc
3247; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v16, v5
3248; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, 1, vcc
3249; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v7, v10
3250; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
3251; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v14
3252; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, 1, vcc
3253; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
3254; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
3255; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v17
3256; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, 1, vcc
3257; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v12, v7
3258; GISEL-NEXT:    v_add_i32_e32 v10, vcc, v13, v10
3259; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v15, v9
3260; GISEL-NEXT:    v_add_i32_e32 v11, vcc, v16, v11
3261; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v7
3262; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
3263; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
3264; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
3265; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
3266; GISEL-NEXT:    v_mul_lo_u32 v10, v3, v1
3267; GISEL-NEXT:    v_mul_lo_u32 v12, 0, v1
3268; GISEL-NEXT:    v_mul_hi_u32 v1, v3, v1
3269; GISEL-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
3270; GISEL-NEXT:    v_mul_lo_u32 v11, v4, v5
3271; GISEL-NEXT:    v_mul_lo_u32 v13, 0, v5
3272; GISEL-NEXT:    v_mul_hi_u32 v5, v4, v5
3273; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
3274; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
3275; GISEL-NEXT:    v_mul_lo_u32 v6, v3, v6
3276; GISEL-NEXT:    v_mul_lo_u32 v7, v4, v7
3277; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v12, v6
3278; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v13, v7
3279; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v6, v1
3280; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
3281; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v10
3282; GISEL-NEXT:    v_subb_u32_e64 v6, s[4:5], 0, v1, vcc
3283; GISEL-NEXT:    v_sub_i32_e64 v1, s[4:5], 0, v1
3284; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
3285; GISEL-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[4:5]
3286; GISEL-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v11
3287; GISEL-NEXT:    v_subb_u32_e64 v8, s[6:7], 0, v5, s[4:5]
3288; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], 0, v5
3289; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v2, v4
3290; GISEL-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[6:7]
3291; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], 0, v6
3292; GISEL-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[6:7]
3293; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
3294; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v8
3295; GISEL-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
3296; GISEL-NEXT:    v_subbrev_u32_e64 v5, vcc, 0, v5, s[4:5]
3297; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
3298; GISEL-NEXT:    v_cndmask_b32_e32 v7, v10, v7, vcc
3299; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, v0, v3
3300; GISEL-NEXT:    v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
3301; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v3
3302; GISEL-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
3303; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
3304; GISEL-NEXT:    v_cndmask_b32_e32 v9, v11, v9, vcc
3305; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, v2, v4
3306; GISEL-NEXT:    v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
3307; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v4
3308; GISEL-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
3309; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v1
3310; GISEL-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
3311; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v10, v3
3312; GISEL-NEXT:    v_subbrev_u32_e32 v15, vcc, 0, v1, vcc
3313; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 0, v5
3314; GISEL-NEXT:    v_cndmask_b32_e64 v16, 0, -1, vcc
3315; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, v11, v4
3316; GISEL-NEXT:    v_subbrev_u32_e32 v17, vcc, 0, v5, vcc
3317; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
3318; GISEL-NEXT:    v_cndmask_b32_e32 v12, v14, v12, vcc
3319; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
3320; GISEL-NEXT:    v_cndmask_b32_e32 v13, v16, v13, vcc
3321; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v12
3322; GISEL-NEXT:    v_cndmask_b32_e32 v3, v10, v3, vcc
3323; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v13
3324; GISEL-NEXT:    v_cndmask_b32_e64 v4, v11, v4, s[4:5]
3325; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
3326; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
3327; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
3328; GISEL-NEXT:    v_cndmask_b32_e64 v3, v5, v17, s[4:5]
3329; GISEL-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v9
3330; GISEL-NEXT:    v_cndmask_b32_e64 v2, v2, v4, s[4:5]
3331; GISEL-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
3332; GISEL-NEXT:    v_cndmask_b32_e64 v3, v8, v3, s[4:5]
3333; GISEL-NEXT:    s_setpc_b64 s[30:31]
3334;
3335; CGP-LABEL: v_urem_v2i64_24bit:
3336; CGP:       ; %bb.0:
3337; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3338; CGP-NEXT:    s_mov_b32 s6, 0xffffff
3339; CGP-NEXT:    v_and_b32_e32 v0, s6, v0
3340; CGP-NEXT:    v_and_b32_e32 v1, s6, v2
3341; CGP-NEXT:    v_and_b32_e32 v2, s6, v4
3342; CGP-NEXT:    v_and_b32_e32 v3, s6, v6
3343; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v0
3344; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2
3345; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v1
3346; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
3347; CGP-NEXT:    v_rcp_f32_e32 v8, v5
3348; CGP-NEXT:    v_rcp_f32_e32 v9, v7
3349; CGP-NEXT:    v_mul_f32_e32 v8, v4, v8
3350; CGP-NEXT:    v_mul_f32_e32 v9, v6, v9
3351; CGP-NEXT:    v_trunc_f32_e32 v8, v8
3352; CGP-NEXT:    v_trunc_f32_e32 v9, v9
3353; CGP-NEXT:    v_mad_f32 v4, -v8, v5, v4
3354; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
3355; CGP-NEXT:    v_mad_f32 v6, -v9, v7, v6
3356; CGP-NEXT:    v_cvt_u32_f32_e32 v9, v9
3357; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v4|, v5
3358; CGP-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s[4:5]
3359; CGP-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v6|, v7
3360; CGP-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s[4:5]
3361; CGP-NEXT:    v_add_i32_e32 v4, vcc, v8, v4
3362; CGP-NEXT:    v_add_i32_e32 v5, vcc, v9, v5
3363; CGP-NEXT:    v_mul_lo_u32 v2, v4, v2
3364; CGP-NEXT:    v_mul_lo_u32 v3, v5, v3
3365; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
3366; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
3367; CGP-NEXT:    v_and_b32_e32 v0, s6, v0
3368; CGP-NEXT:    v_and_b32_e32 v2, s6, v1
3369; CGP-NEXT:    v_mov_b32_e32 v1, 0
3370; CGP-NEXT:    v_mov_b32_e32 v3, 0
3371; CGP-NEXT:    s_setpc_b64 s[30:31]
3372  %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
3373  %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
3374  %result = urem <2 x i64> %num.mask, %den.mask
3375  ret <2 x i64> %result
3376}
3377