1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2018 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 
28 #include "sfn_emitaluinstruction.h"
29 #include "sfn_debug.h"
30 
31 #include "gallium/drivers/r600/r600_shader.h"
32 
33 namespace r600 {
34 
35 using std::vector;
36 
EmitAluInstruction(ShaderFromNirProcessor & processor)37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
38    EmitInstruction (processor)
39 {
40 
41 }
42 
do_emit(nir_instr * ir)43 bool EmitAluInstruction::do_emit(nir_instr* ir)
44 {
45    const nir_alu_instr& instr = *nir_instr_as_alu(ir);
46 
47    r600::sfn_log << SfnLog::instr << "emit '"
48                  << *ir
49                  << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
50                  << "' (" << __func__ << ")\n";
51 
52    preload_src(instr);
53 
54    switch (instr.op) {
55    case nir_op_f2b32: return emit_alu_f2b32(instr);
56    case nir_op_b2f32: return emit_alu_b2f(instr);
57    case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
58    case nir_op_i2b32: return emit_alu_i2orf2_b1(instr, op2_setne_int);
59    case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
60    case nir_op_b2b1:
61    case nir_op_b2b32:
62    case nir_op_mov:return emit_mov(instr);
63    case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
64    case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
65    case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
66    case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
67    case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
68    case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
69    case nir_op_fsin: return emit_alu_trig_op1(instr, op1_sin);
70    case nir_op_fcos: return emit_alu_trig_op1(instr, op1_cos);
71    case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
72    case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
73 
74    case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
75    case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
76    case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
77    case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
78    case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
79    case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
80 
81    case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
82    case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
83    case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
84 
85    case nir_op_fsign: return emit_fsign(instr);
86    case nir_op_fdph:  return emit_fdph(instr);
87 
88    case nir_op_ibitfield_extract: return emit_bitfield_extract(instr, op3_bfe_int);
89    case nir_op_ubitfield_extract: return emit_bitfield_extract(instr, op3_bfe_uint);
90    case nir_op_bitfield_insert: return emit_bitfield_insert(instr);
91    case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
92    case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
93 
94    case nir_op_ieq32:
95    case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
96 
97    case nir_op_ine32:
98    case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
99    case nir_op_uge32:
100    case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
101    case nir_op_ige32:
102    case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
103    case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
104    case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
105 
106    case nir_op_ult32:
107    case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
108 
109    case nir_op_ilt32:
110    case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
111    case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
112    case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
113    case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
114    case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
115    case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
116    case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
117    case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
118    case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
119    case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
120    case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
121    case nir_op_iabs: return emit_alu_iabs(instr);
122    case nir_op_ineg: return emit_alu_ineg(instr);
123    case nir_op_idiv: return emit_alu_div_int(instr, true, false);
124    case nir_op_udiv: return emit_alu_div_int(instr, false, false);
125    case nir_op_umod: return emit_alu_div_int(instr, false, true);
126    case nir_op_isign: return emit_alu_isign(instr);
127 
128    case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
129 
130    case nir_op_flt32:
131    case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
132 
133    case nir_op_fge32:
134    case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
135    case nir_op_fneu32:
136    case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10);
137    case nir_op_feq32:
138    case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
139 
140    case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
141    case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
142    case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
143    case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
144    case nir_op_fadd: return emit_alu_op2(instr, op2_add);
145    case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
146    case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
147    case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
148    case nir_op_fdot2: return emit_dot(instr, 2);
149    case nir_op_fdot3: return emit_dot(instr, 3);
150    case nir_op_fdot4: return emit_dot(instr, 4);
151 
152    case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
153    case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
154    case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
155 
156    case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
157    case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
158    case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
159 
160    case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
161    case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
162    case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
163 
164    case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
165    case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
166    case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
167 
168 
169    case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
170    case nir_op_b32csel: return emit_alu_op3(instr, op3_cnde,  {0, 2, 1});
171    case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde,  {0, 2, 1});
172    case nir_op_vec2: return emit_create_vec(instr, 2);
173    case nir_op_vec3: return emit_create_vec(instr, 3);
174    case nir_op_vec4: return emit_create_vec(instr, 4);
175 
176    case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
177    case nir_op_ufind_msb: return emit_find_msb(instr, false);
178    case nir_op_ifind_msb: return emit_find_msb(instr, true);
179    case nir_op_b2i32: return emit_b2i32(instr);
180    case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
181    case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
182    case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
183    case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
184    case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
185    case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
186 
187 
188    /* These are in the ALU instruction list, but they should be texture instructions */
189    case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
190    case nir_op_fddx_coarse:
191    case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
192 
193    case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v,  true);
194    case nir_op_fddy_coarse:
195    case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
196 
197    case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24,  {0, 1, 2});
198    case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
199    default:
200       return false;
201    }
202 }
203 
preload_src(const nir_alu_instr & instr)204 void EmitAluInstruction::preload_src(const nir_alu_instr& instr)
205 {
206    const nir_op_info *op_info = &nir_op_infos[instr.op];
207    assert(op_info->num_inputs <= 4);
208 
209    unsigned nsrc_comp = num_src_comp(instr);
210    sfn_log << SfnLog::reg << "Preload:\n";
211    for (unsigned i = 0; i < op_info->num_inputs; ++i) {
212       for (unsigned c = 0; c < nsrc_comp; ++c) {
213          m_src[i][c] = from_nir(instr.src[i], c);
214          sfn_log << SfnLog::reg << " " << *m_src[i][c];
215 
216       }
217       sfn_log << SfnLog::reg << "\n";
218    }
219    if (instr.op == nir_op_fdph) {
220       m_src[1][3] = from_nir(instr.src[1], 3);
221       sfn_log << SfnLog::reg << " extra:" << *m_src[1][3] << "\n";
222    }
223 
224    split_constants(instr, nsrc_comp);
225 }
226 
num_src_comp(const nir_alu_instr & instr)227 unsigned EmitAluInstruction::num_src_comp(const nir_alu_instr& instr)
228 {
229    switch (instr.op) {
230    case nir_op_fdot2:
231    case nir_op_bany_inequal2:
232    case nir_op_ball_iequal2:
233    case nir_op_bany_fnequal2:
234    case nir_op_ball_fequal2:
235       return 2;
236 
237    case nir_op_fdot3:
238    case nir_op_bany_inequal3:
239    case nir_op_ball_iequal3:
240    case nir_op_bany_fnequal3:
241    case nir_op_ball_fequal3:
242       return 3;
243 
244    case nir_op_fdot4:
245    case nir_op_fdph:
246    case nir_op_bany_inequal4:
247    case nir_op_ball_iequal4:
248    case nir_op_bany_fnequal4:
249    case nir_op_ball_fequal4:
250       return 4;
251 
252    case nir_op_vec2:
253    case nir_op_vec3:
254    case nir_op_vec4:
255       return 1;
256 
257    default:
258       return nir_dest_num_components(instr.dest.dest);
259 
260    }
261 }
262 
263 
264 
split_constants(const nir_alu_instr & instr,unsigned nsrc_comp)265 void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned nsrc_comp)
266 {
267     const nir_op_info *op_info = &nir_op_infos[instr.op];
268     if (op_info->num_inputs < 2)
269        return;
270 
271     int nconst = 0;
272     std::array<const UniformValue *,4> c;
273     std::array<int,4> idx;
274     for (unsigned i = 0; i < op_info->num_inputs; ++i) {
275        PValue& src = m_src[i][0];
276        assert(src);
277        sfn_log << SfnLog::reg << "Split test " << *src;
278 
279        if (src->type() == Value::kconst) {
280           c[nconst] = static_cast<const UniformValue *>(src.get());
281           idx[nconst++] = i;
282           sfn_log << SfnLog::reg << " is constant " << i;
283        }
284        sfn_log << SfnLog::reg << "\n";
285     }
286 
287     if (nconst < 2)
288        return;
289 
290     unsigned sel = c[0]->sel();
291     unsigned kcache =  c[0]->kcache_bank();
292     sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
293 
294     for (int i = 1; i < nconst; ++i) {
295        sfn_log << "sel[" << i << "] = " <<  c[i]->sel() << "\n";
296        if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) {
297           AluInstruction *ir = nullptr;
298           auto v = get_temp_vec4();
299           for (unsigned k = 0; k < nsrc_comp; ++k) {
300              ir = new AluInstruction(op1_mov, v[k], m_src[idx[i]][k], {write});
301              emit_instruction(ir);
302              m_src[idx[i]][k] = v[k];
303           }
304           make_last(ir);
305        }
306     }
307 }
308 
emit_alu_inot(const nir_alu_instr & instr)309 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
310 {
311    if (instr.src[0].negate || instr.src[0].abs) {
312       std::cerr << "source modifiers not supported with int ops\n";
313       return false;
314    }
315 
316    AluInstruction *ir = nullptr;
317    for (int i = 0; i < 4 ; ++i) {
318       if (instr.dest.write_mask & (1 << i)){
319          ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
320                                  m_src[0][i], write);
321          emit_instruction(ir);
322       }
323    }
324    make_last(ir);
325    return true;
326 }
327 
emit_alu_op1(const nir_alu_instr & instr,EAluOp opcode,const AluOpFlags & flags)328 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
329                                       const AluOpFlags& flags)
330 {
331    AluInstruction *ir = nullptr;
332    for (int i = 0; i < 4 ; ++i) {
333       if (instr.dest.write_mask & (1 << i)){
334          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
335                                  m_src[0][i], write);
336 
337          if (flags.test(alu_src0_abs) || instr.src[0].abs)
338             ir->set_flag(alu_src0_abs);
339 
340          if (instr.src[0].negate ^ flags.test(alu_src0_neg))
341             ir->set_flag(alu_src0_neg);
342 
343          if (flags.test(alu_dst_clamp) || instr.dest.saturate)
344              ir->set_flag(alu_dst_clamp);
345 
346          emit_instruction(ir);
347       }
348    }
349    make_last(ir);
350 
351    return true;
352 }
353 
emit_mov(const nir_alu_instr & instr)354 bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
355 {
356    /* If the op is a plain move beween SSA values we can just forward
357     * the register reference to the original register */
358    if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa &&
359        !instr.src[0].abs && !instr.src[0].negate  && !instr.dest.saturate) {
360       bool result = true;
361       for (int i = 0; i < 4 ; ++i) {
362          if (instr.dest.write_mask & (1 << i)){
363             result &= inject_register(instr.dest.dest.ssa.index, i,
364                                       m_src[0][i], true);
365          }
366       }
367       return result;
368    } else {
369       return emit_alu_op1(instr, op1_mov);
370    }
371 }
372 
emit_alu_trig_op1(const nir_alu_instr & instr,EAluOp opcode)373 bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode)
374 {
375    // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
376    // then shift back
377 
378    const float inv_2_pi = 0.15915494f;
379 
380    PValue v[4]; // this might need some additional temp register creation
381    for (unsigned i = 0; i < 4 ; ++i)
382       v[i] = from_nir(instr.dest, i);
383 
384    PValue inv_pihalf = PValue(new LiteralValue(inv_2_pi, 0));
385    AluInstruction *ir = nullptr;
386    for (unsigned i = 0; i < 4 ; ++i) {
387       if (!(instr.dest.write_mask & (1 << i)))
388          continue;
389       ir = new AluInstruction(op3_muladd_ieee, v[i],
390                               {m_src[0][i], inv_pihalf, Value::zero_dot_5},
391                               {alu_write});
392       if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
393       emit_instruction(ir);
394    }
395    make_last(ir);
396 
397    for (unsigned i = 0; i < 4 ; ++i) {
398       if (!(instr.dest.write_mask & (1 << i)))
399          continue;
400       ir = new AluInstruction(op1_fract, v[i], v[i], {alu_write});
401       emit_instruction(ir);
402    }
403    make_last(ir);
404 
405    for (unsigned i = 0; i < 4 ; ++i) {
406       if (!(instr.dest.write_mask & (1 << i)))
407          continue;
408       ir = new AluInstruction(op2_add, v[i], v[i], Value::zero_dot_5, write);
409       ir->set_flag(alu_src1_neg);
410       emit_instruction(ir);
411    }
412    make_last(ir);
413 
414    for (unsigned i = 0; i < 4 ; ++i) {
415       if (!(instr.dest.write_mask & (1 << i)))
416          continue;
417 
418       ir = new AluInstruction(opcode, v[i], v[i], last_write);
419       emit_instruction(ir);
420    }
421    return true;
422 }
423 
emit_alu_trans_op1(const nir_alu_instr & instr,EAluOp opcode,bool absolute)424 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
425                                             bool absolute)
426 {
427    AluInstruction *ir = nullptr;
428    std::set<int> src_idx;
429 
430    if (get_chip_class() == CAYMAN) {
431       int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
432       for (int i = 0; i < last_slot; ++i) {
433          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
434                                  m_src[0][0], instr.dest.write_mask & (1 << i) ? write : empty);
435          if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
436          if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
437          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
438 
439          if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
440 
441          emit_instruction(ir);
442       }
443    } else {
444       for (int i = 0; i < 4 ; ++i) {
445          if (instr.dest.write_mask & (1 << i)){
446             ir = new AluInstruction(opcode, from_nir(instr.dest, i),
447                                     m_src[0][i], last_write);
448             if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
449             if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
450             if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
451             emit_instruction(ir);
452          }
453       }
454    }
455    return true;
456 }
457 
emit_alu_f2i32_or_u32(const nir_alu_instr & instr,EAluOp op)458 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
459 {
460    AluInstruction *ir = nullptr;
461    std::array<PValue, 4> v;
462 
463    for (int i = 0; i < 4; ++i) {
464       if (!(instr.dest.write_mask & (1 << i)))
465          continue;
466       v[i] = from_nir(instr.dest, i);
467       ir = new AluInstruction(op1_trunc, v[i], m_src[0][i], {alu_write});
468       if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
469       if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
470       emit_instruction(ir);
471    }
472    make_last(ir);
473 
474    for (int i = 0; i < 4; ++i) {
475       if (!(instr.dest.write_mask & (1 << i)))
476          continue;
477       ir = new AluInstruction(op, v[i], v[i], {alu_write});
478       emit_instruction(ir);
479       if (op == op1_flt_to_uint)
480          make_last(ir);
481    }
482    make_last(ir);
483 
484    return true;
485 }
486 
emit_alu_f2b32(const nir_alu_instr & instr)487 bool EmitAluInstruction::emit_alu_f2b32(const nir_alu_instr& instr)
488 {
489    AluInstruction *ir = nullptr;
490    for (int i = 0; i < 4 ; ++i) {
491       if (instr.dest.write_mask & (1 << i)){
492          ir = new AluInstruction(op2_setne_dx10, from_nir(instr.dest, i),
493                                  m_src[0][i], literal(0.0f), write);
494          emit_instruction(ir);
495       }
496    }
497    make_last(ir);
498    return true;
499 }
500 
emit_find_msb(const nir_alu_instr & instr,bool sgn)501 bool EmitAluInstruction::emit_find_msb(const nir_alu_instr& instr, bool sgn)
502 {
503    int sel_tmp = allocate_temp_register();
504    int sel_tmp2 = allocate_temp_register();
505    GPRVector tmp(sel_tmp, {0,1,2,3});
506    GPRVector tmp2(sel_tmp2, {0,1,2,3});
507    AluInstruction *ir = nullptr;
508    EAluOp opcode = sgn ? op1_ffbh_int : op1_ffbh_uint;
509    for (int i = 0; i < 4; ++i) {
510       if (!(instr.dest.write_mask & (1 << i)))
511          continue;
512 
513       ir = new AluInstruction(opcode, tmp.reg_i(i), m_src[0][i], write);
514       emit_instruction(ir);
515    }
516    make_last(ir);
517 
518    for (int i = 0; i < 4 ; ++i) {
519       if (!(instr.dest.write_mask & (1 << i)))
520          continue;
521 
522       ir = new AluInstruction(op2_sub_int, tmp2.reg_i(i),
523                               PValue(new LiteralValue(31u, 0)), tmp.reg_i(i), write);
524       emit_instruction(ir);
525    }
526    make_last(ir);
527 
528    for (int i = 0; i < 4 ; ++i) {
529       if (!(instr.dest.write_mask & (1 << i)))
530          continue;
531 
532       ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), tmp.reg_i(i),
533                               tmp2.reg_i(i), tmp.reg_i(i), write);
534       emit_instruction(ir);
535    }
536    make_last(ir);
537 
538    return true;
539 }
540 
emit_b2i32(const nir_alu_instr & instr)541 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
542 {
543    AluInstruction *ir = nullptr;
544    for (int i = 0; i < 4 ; ++i) {
545       if (!(instr.dest.write_mask & (1 << i)))
546          continue;
547 
548       ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
549                               m_src[0][i], Value::one_i, write);
550      emit_instruction(ir);
551    }
552    make_last(ir);
553 
554    return true;
555 }
556 
emit_pack_64_2x32_split(const nir_alu_instr & instr)557 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
558 {
559    AluInstruction *ir = nullptr;
560    for (unsigned i = 0; i < 2; ++i) {
561       if (!(instr.dest.write_mask & (1 << i)))
562          continue;
563      ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
564                              m_src[0][i], write);
565      emit_instruction(ir);
566    }
567    ir->set_flag(alu_last_instr);
568    return true;
569 }
570 
emit_unpack_64_2x32_split(const nir_alu_instr & instr,unsigned comp)571 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
572 {
573    emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
574                                        m_src[0][comp], last_write));
575    return true;
576 }
577 
emit_create_vec(const nir_alu_instr & instr,unsigned nc)578 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
579 {
580    AluInstruction *ir = nullptr;
581    std::set<int> src_slot;
582    for(unsigned i = 0; i < nc; ++i) {
583       if (instr.dest.write_mask & (1 << i)){
584          auto src = m_src[i][0];
585          ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
586          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
587 
588          // FIXME: This is a rather crude approach to fix the problem that
589          // r600 can't read from four different slots of the same component
590          // here we check only for the register index
591          if (src->type() == Value::gpr)
592             src_slot.insert(src->sel());
593          if (src_slot.size() >= 3) {
594             src_slot.clear();
595             ir->set_flag(alu_last_instr);
596          }
597          emit_instruction(ir);
598       }
599    }
600    if (ir)
601       ir->set_flag(alu_last_instr);
602    return true;
603 }
604 
emit_dot(const nir_alu_instr & instr,int n)605 bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
606 {
607    const nir_alu_src& src0 = instr.src[0];
608    const nir_alu_src& src1 = instr.src[1];
609 
610    AluInstruction *ir = nullptr;
611    for (int i = 0; i < n ; ++i) {
612       ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
613                               m_src[0][i], m_src[1][i],
614                               instr.dest.write_mask & (1 << i) ? write : empty);
615 
616       if (src0.negate) ir->set_flag(alu_src0_neg);
617       if (src0.abs) ir->set_flag(alu_src0_abs);
618       if (src1.negate) ir->set_flag(alu_src1_neg);
619       if (src1.abs) ir->set_flag(alu_src1_abs);
620 
621       if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
622       emit_instruction(ir);
623    }
624    for (int i = n; i < 4 ; ++i) {
625       ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
626                               Value::zero, Value::zero,
627                               instr.dest.write_mask & (1 << i) ? write : empty);
628       emit_instruction(ir);
629    }
630 
631    if (ir)
632       ir->set_flag(alu_last_instr);
633    return true;
634 }
635 
emit_fdph(const nir_alu_instr & instr)636 bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
637 {
638    const nir_alu_src& src0 = instr.src[0];
639    const nir_alu_src& src1 = instr.src[1];
640 
641    AluInstruction *ir = nullptr;
642    for (int i = 0; i < 3 ; ++i) {
643       ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
644                               m_src[0][i], m_src[1][i],
645                               instr.dest.write_mask & (1 << i) ? write : empty);
646       if (src0.negate) ir->set_flag(alu_src0_neg);
647       if (src0.abs) ir->set_flag(alu_src0_abs);
648       if (src1.negate) ir->set_flag(alu_src1_neg);
649       if (src1.abs) ir->set_flag(alu_src1_abs);
650       if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
651       emit_instruction(ir);
652    }
653 
654    ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
655                            m_src[1][3], (instr.dest.write_mask) & (1 << 3) ? write : empty);
656    if (src1.negate) ir->set_flag(alu_src1_neg);
657    if (src1.abs) ir->set_flag(alu_src1_abs);
658    emit_instruction(ir);
659 
660    ir->set_flag(alu_last_instr);
661    return true;
662 
663 }
664 
emit_alu_i2orf2_b1(const nir_alu_instr & instr,EAluOp op)665 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
666 {
667    AluInstruction *ir = nullptr;
668    for (int i = 0; i < 4 ; ++i) {
669       if (instr.dest.write_mask & (1 << i)) {
670          ir = new AluInstruction(op, from_nir(instr.dest, i),
671                                  m_src[0][i], Value::zero,
672                                  write);
673          emit_instruction(ir);
674       }
675    }
676    if (ir)
677       ir->set_flag(alu_last_instr);
678    return true;
679 }
680 
emit_alu_b2f(const nir_alu_instr & instr)681 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
682 {
683    AluInstruction *ir = nullptr;
684    for (int i = 0; i < 4 ; ++i) {
685       if (instr.dest.write_mask & (1 << i)){
686          ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
687                                  m_src[0][i], Value::one_f, write);
688          if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
689          if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
690          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
691          emit_instruction(ir);
692       }
693    }
694    if (ir)
695       ir->set_flag(alu_last_instr);
696    return true;
697 }
698 
emit_any_all_icomp(const nir_alu_instr & instr,EAluOp op,unsigned nc,bool all)699 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
700 {
701 
702    AluInstruction *ir = nullptr;
703    PValue v[4]; // this might need some additional temp register creation
704    for (unsigned i = 0; i < 4 ; ++i)
705       v[i] = from_nir(instr.dest, i);
706 
707    EAluOp combine = all ? op2_and_int : op2_or_int;
708 
709    /* For integers we can not use the modifiers, so this needs some emulation */
710    /* Should actually be lowered with NIR */
711    if (instr.src[0].negate == instr.src[1].negate &&
712        instr.src[0].abs == instr.src[1].abs) {
713 
714       for (unsigned i = 0; i < nc ; ++i) {
715          ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
716          emit_instruction(ir);
717       }
718       if (ir)
719          ir->set_flag(alu_last_instr);
720    } else {
721       std::cerr << "Negate in iequal/inequal not (yet) supported\n";
722       return false;
723    }
724 
725    for (unsigned i = 0; i < nc/2 ; ++i) {
726       ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
727       emit_instruction(ir);
728    }
729    if (ir)
730       ir->set_flag(alu_last_instr);
731 
732    if (nc > 2) {
733       ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
734       emit_instruction(ir);
735    }
736 
737    return true;
738 }
739 
emit_any_all_fcomp(const nir_alu_instr & instr,EAluOp op,unsigned nc,bool all)740 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
741 {
742    AluInstruction *ir = nullptr;
743    PValue v[4]; // this might need some additional temp register creation
744    for (unsigned i = 0; i < 4 ; ++i)
745       v[i] = from_nir(instr.dest, i);
746 
747    for (unsigned i = 0; i < nc ; ++i) {
748       ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
749 
750       if (instr.src[0].abs)
751          ir->set_flag(alu_src0_abs);
752       if (instr.src[0].negate)
753          ir->set_flag(alu_src0_neg);
754 
755       if (instr.src[1].abs)
756          ir->set_flag(alu_src1_abs);
757       if (instr.src[1].negate)
758          ir->set_flag(alu_src1_neg);
759 
760       emit_instruction(ir);
761    }
762    if (ir)
763       ir->set_flag(alu_last_instr);
764 
765    for (unsigned i = 0; i < nc ; ++i) {
766       ir = new AluInstruction(op1_max4, v[i], v[i], write);
767       if (all) ir->set_flag(alu_src0_neg);
768       emit_instruction(ir);
769    }
770 
771    for (unsigned i = nc; i < 4 ; ++i) {
772       ir = new AluInstruction(op1_max4, v[i],
773                               all ? Value::one_f : Value::zero, write);
774       if (all)
775          ir->set_flag(alu_src0_neg);
776 
777       emit_instruction(ir);
778    }
779 
780    ir->set_flag(alu_last_instr);
781 
782    if (all)
783       op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
784    else
785       op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
786 
787    ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
788    if (all)
789       ir->set_flag(alu_src1_neg);
790    emit_instruction(ir);
791 
792    return true;
793 }
794 
emit_any_all_fcomp2(const nir_alu_instr & instr,EAluOp op,bool all)795 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
796 {
797    AluInstruction *ir = nullptr;
798    PValue v[4]; // this might need some additional temp register creation
799    for (unsigned i = 0; i < 4 ; ++i)
800       v[i] = from_nir(instr.dest, i);
801 
802    for (unsigned i = 0; i < 2 ; ++i) {
803       ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
804       if (instr.src[0].abs)
805          ir->set_flag(alu_src0_abs);
806       if (instr.src[0].negate)
807          ir->set_flag(alu_src0_neg);
808 
809       if (instr.src[1].abs)
810          ir->set_flag(alu_src1_abs);
811       if (instr.src[1].negate)
812          ir->set_flag(alu_src1_neg);
813 
814       emit_instruction(ir);
815    }
816    if (ir)
817       ir->set_flag(alu_last_instr);
818 
819    op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
820    ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
821    emit_instruction(ir);
822 
823    return true;
824 }
825 
emit_alu_trans_op2(const nir_alu_instr & instr,EAluOp opcode)826 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
827 {
828    const nir_alu_src& src0 = instr.src[0];
829    const nir_alu_src& src1 = instr.src[1];
830 
831    AluInstruction *ir = nullptr;
832 
833    if (get_chip_class() == CAYMAN) {
834       int lasti = util_last_bit(instr.dest.write_mask);
835       for (int k = 0; k < lasti ; ++k) {
836          if (instr.dest.write_mask & (1 << k)) {
837 
838             for (int i = 0; i < 4; i++) {
839                ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][k], m_src[0][k], (i == k) ? write : empty);
840                if (src0.negate) ir->set_flag(alu_src0_neg);
841                if (src0.abs) ir->set_flag(alu_src0_abs);
842                if (src1.negate) ir->set_flag(alu_src1_neg);
843                if (src1.abs) ir->set_flag(alu_src1_abs);
844                if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
845                if (i == 3) ir->set_flag(alu_last_instr);
846                emit_instruction(ir);
847             }
848          }
849       }
850    } else {
851       for (int i = 0; i < 4 ; ++i) {
852          if (instr.dest.write_mask & (1 << i)){
853             ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][i], m_src[1][i], last_write);
854             if (src0.negate) ir->set_flag(alu_src0_neg);
855             if (src0.abs) ir->set_flag(alu_src0_abs);
856             if (src1.negate) ir->set_flag(alu_src1_neg);
857             if (src1.abs) ir->set_flag(alu_src1_abs);
858             if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
859             emit_instruction(ir);
860          }
861       }
862    }
863    return true;
864 }
865 
emit_alu_op2_int(const nir_alu_instr & instr,EAluOp opcode,AluOp2Opts opts)866 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
867 {
868 
869    const nir_alu_src& src0 = instr.src[0];
870    const nir_alu_src& src1 = instr.src[1];
871 
872    if (src0.negate || src1.negate ||
873        src0.abs || src1.abs) {
874       std::cerr << "R600: don't support modifiers with integer operations";
875       return false;
876    }
877    return emit_alu_op2(instr, opcode, opts);
878 }
879 
emit_alu_op2(const nir_alu_instr & instr,EAluOp opcode,AluOp2Opts ops)880 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
881 {
882    const nir_alu_src *src0 = &instr.src[0];
883    const nir_alu_src *src1 = &instr.src[1];
884 
885    int idx0 = 0;
886    int idx1 = 1;
887    if (ops & op2_opt_reverse) {
888       std::swap(src0, src1);
889       std::swap(idx0, idx1);
890    }
891 
892    bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
893 
894    AluInstruction *ir = nullptr;
895    for (int i = 0; i < 4 ; ++i) {
896       if (instr.dest.write_mask & (1 << i)){
897          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
898                                  m_src[idx0][i], m_src[idx1][i], write);
899 
900          if (src0->negate) ir->set_flag(alu_src0_neg);
901          if (src0->abs) ir->set_flag(alu_src0_abs);
902          if (src1_negate) ir->set_flag(alu_src1_neg);
903          if (src1->abs) ir->set_flag(alu_src1_abs);
904          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
905          emit_instruction(ir);
906       }
907    }
908    if (ir)
909       ir->set_flag(alu_last_instr);
910    return true;
911 }
912 
emit_alu_op2_split_src_mods(const nir_alu_instr & instr,EAluOp opcode,AluOp2Opts ops)913 bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
914 {
915    const nir_alu_src *src0 = &instr.src[0];
916    const nir_alu_src *src1 = &instr.src[1];
917 
918    if (ops & op2_opt_reverse)
919       std::swap(src0, src1);
920 
921    GPRVector::Values v0;
922    for (int i = 0; i < 4 ; ++i)
923       v0[i] = m_src[0][i];
924 
925    GPRVector::Values v1;
926    for (int i = 0; i < 4 ; ++i)
927       v1[i] = m_src[1][i];
928 
929    if (src0->abs ||   src0->negate) {
930       int src0_tmp = allocate_temp_register();
931       GPRVector::Values v0_temp;
932       AluInstruction *ir = nullptr;
933       for (int i = 0; i < 4 ; ++i) {
934          if (instr.dest.write_mask & (1 << i)) {
935             v0_temp[i] = PValue(new GPRValue(src0_tmp, i));
936             ir = new AluInstruction(op1_mov, v0_temp[i], v0[i], write);
937             if (src0->abs) ir->set_flag(alu_src0_abs);
938             if (src0->negate) ir->set_flag(alu_src0_neg);
939             emit_instruction(ir);
940             v0[i] = v0_temp[i];
941          }
942       }
943       if (ir)
944          ir->set_flag(alu_last_instr);
945    }
946 
947    if (src1->abs || src1->negate) {
948       int src1_tmp = allocate_temp_register();
949       GPRVector::Values v1_temp;
950       AluInstruction *ir = nullptr;
951       for (int i = 0; i < 4 ; ++i) {
952          if (instr.dest.write_mask & (1 << i)) {
953             v1_temp[i] = PValue(new GPRValue(src1_tmp, i));
954             ir = new AluInstruction(op1_mov, v1_temp[i], v1[i], {alu_write});
955             if (src1->abs) ir->set_flag(alu_src0_abs);
956             if (src1->negate) ir->set_flag(alu_src0_neg);
957             emit_instruction(ir);
958             v1[i] = v1_temp[i];
959          }
960       }
961       if (ir)
962          ir->set_flag(alu_last_instr);
963    }
964 
965    AluInstruction *ir = nullptr;
966    for (int i = 0; i < 4 ; ++i) {
967       if (instr.dest.write_mask & (1 << i)){
968          ir = new AluInstruction(opcode, from_nir(instr.dest, i), {v0[i], v1[i]}, {alu_write});
969          emit_instruction(ir);
970       }
971    }
972    if (ir)
973       ir->set_flag(alu_last_instr);
974    return true;
975 }
976 
977 
emit_alu_isign(const nir_alu_instr & instr)978 bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr& instr)
979 {
980    int sel_tmp = allocate_temp_register();
981    GPRVector tmp(sel_tmp, {0,1,2,3});
982 
983    AluInstruction *ir = nullptr;
984    PValue help[4];
985 
986    for (int i = 0; i < 4 ; ++i) {
987       if (instr.dest.write_mask & (1 << i)){
988          help[i] = from_nir(instr.dest, i);
989          auto s = m_src[0][i];
990          ir = new AluInstruction(op3_cndgt_int, help[i], s, Value::one_i, s, write);
991          emit_instruction(ir);
992       }
993    }
994    if (ir)
995       ir->set_flag(alu_last_instr);
996 
997    for (int i = 0; i < 4 ; ++i) {
998       if (instr.dest.write_mask & (1 << i)){
999          ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, help[i], write);
1000          emit_instruction(ir);
1001       }
1002    }
1003    if (ir)
1004       ir->set_flag(alu_last_instr);
1005 
1006    for (int i = 0; i < 4 ; ++i) {
1007       if (instr.dest.write_mask & (1 << i)){
1008 
1009          ir = new AluInstruction(op3_cndgt_int, help[i], tmp.reg_i(i),
1010                                  PValue(new LiteralValue(-1,0)), help[i], write);
1011          emit_instruction(ir);
1012       }
1013    }
1014    if (ir)
1015       ir->set_flag(alu_last_instr);
1016    return true;
1017 }
1018 
emit_fsign(const nir_alu_instr & instr)1019 bool EmitAluInstruction::emit_fsign(const nir_alu_instr& instr)
1020 {
1021    PValue help[4];
1022    PValue src[4];
1023    AluInstruction *ir = nullptr;
1024 
1025    for (int i = 0; i < 4 ; ++i) {
1026       help[i] = from_nir(instr.dest, i);
1027       src[i] = m_src[0][i];
1028    }
1029 
1030    if (instr.src[0].abs) {
1031 
1032       for (int i = 0; i < 4 ; ++i) {
1033          if (instr.dest.write_mask & (1 << i)){
1034             ir = new AluInstruction(op2_setgt, help[i], src[i], Value::zero, write);
1035             ir->set_flag(alu_src0_abs);
1036             emit_instruction(ir);
1037          }
1038       }
1039       if (ir)
1040          ir->set_flag(alu_last_instr);
1041 
1042       if (instr.src[0].negate) {
1043          for (int i = 0; i < 4 ; ++i) {
1044             if (instr.dest.write_mask & (1 << i)){
1045                ir = new AluInstruction(op1_mov, help[i], help[i], write);
1046                ir->set_flag(alu_src0_neg);
1047                emit_instruction(ir);
1048             }
1049          }
1050          if (ir)
1051             ir->set_flag(alu_last_instr);
1052       }
1053 
1054       return true;
1055    }
1056 
1057    for (int i = 0; i < 4 ; ++i) {
1058       if (instr.dest.write_mask & (1 << i)){
1059          ir = new AluInstruction(op3_cndgt, help[i], src[i], Value::one_f, src[i], write);
1060          if (instr.src[0].negate) {
1061             ir->set_flag(alu_src0_neg);
1062             ir->set_flag(alu_src2_neg);
1063          }
1064          emit_instruction(ir);
1065       }
1066    }
1067 
1068    if (ir)
1069       ir->set_flag(alu_last_instr);
1070 
1071    for (int i = 0; i < 4 ; ++i) {
1072       if (instr.dest.write_mask & (1 << i)){
1073          ir = new AluInstruction(op3_cndgt, help[i], help[i], Value::one_f, help[i], write);
1074          ir->set_flag(alu_src0_neg);
1075          ir->set_flag(alu_src1_neg);
1076          emit_instruction(ir);
1077       }
1078    }
1079    if (ir)
1080       ir->set_flag(alu_last_instr);
1081    return true;
1082 }
1083 
emit_alu_op3(const nir_alu_instr & instr,EAluOp opcode,std::array<uint8_t,3> reorder)1084 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
1085                                       std::array<uint8_t, 3> reorder)
1086 {
1087    const nir_alu_src *src[3];
1088    src[0] = &instr.src[reorder[0]];
1089    src[1] = &instr.src[reorder[1]];
1090    src[2] = &instr.src[reorder[2]];
1091 
1092    AluInstruction *ir = nullptr;
1093    for (int i = 0; i < 4 ; ++i) {
1094       if (instr.dest.write_mask & (1 << i)){
1095          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
1096                                  m_src[reorder[0]][i],
1097                                  m_src[reorder[1]][i],
1098                                  m_src[reorder[2]][i],
1099                write);
1100 
1101          if (src[0]->negate) ir->set_flag(alu_src0_neg);
1102          if (src[1]->negate) ir->set_flag(alu_src1_neg);
1103          if (src[2]->negate) ir->set_flag(alu_src2_neg);
1104 
1105          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
1106          ir->set_flag(alu_write);
1107          emit_instruction(ir);
1108       }
1109    }
1110    make_last(ir);
1111    return true;
1112 }
1113 
emit_alu_ineg(const nir_alu_instr & instr)1114 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
1115 {
1116    AluInstruction *ir = nullptr;
1117    for (int i = 0; i < 4 ; ++i) {
1118       if (instr.dest.write_mask & (1 << i)){
1119          ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
1120                                  m_src[0][i], write);
1121          emit_instruction(ir);
1122       }
1123    }
1124    if (ir)
1125       ir->set_flag(alu_last_instr);
1126 
1127    return true;
1128 }
1129 
1130 static const char swz[] = "xyzw01?_";
1131 
1132 
1133 
emit_alu_iabs(const nir_alu_instr & instr)1134 bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr& instr)
1135 {
1136    int sel_tmp = allocate_temp_register();
1137    GPRVector tmp(sel_tmp, {0,1,2,3});
1138 
1139    std::array<PValue,4> src;
1140    AluInstruction *ir = nullptr;
1141    for (int i = 0; i < 4 ; ++i) {
1142       if (instr.dest.write_mask & (1 << i)){
1143          ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, m_src[0][i], write);
1144          emit_instruction(ir);
1145       }
1146    }
1147    make_last(ir);
1148 
1149    for (int i = 0; i < 4 ; ++i) {
1150       if (instr.dest.write_mask & (1 << i)){
1151          ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), m_src[0][i],
1152                                  m_src[0][i], tmp.reg_i(i), write);
1153          emit_instruction(ir);
1154       }
1155    }
1156    make_last(ir);
1157    return true;
1158 }
1159 
emit_alu_div_int(const nir_alu_instr & instr,bool use_signed,bool mod)1160 bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr& instr, bool use_signed, bool mod)
1161 {
1162 
1163    int sel_tmp = allocate_temp_register();
1164    int sel_tmp0 = allocate_temp_register();
1165    int sel_tmp1 = allocate_temp_register();
1166 
1167    PValue asrc1(new GPRValue(sel_tmp, 0));
1168    PValue asrc2(new GPRValue(sel_tmp, 1));
1169    PValue rsign(new GPRValue(sel_tmp, 2));
1170    PValue err(new GPRValue(sel_tmp, 3));
1171 
1172    GPRVector tmp0(sel_tmp0, {0,1,2,3});
1173    GPRVector tmp1(sel_tmp1, {0,1,2,3});
1174 
1175    std::array<PValue, 4> src0;
1176    std::array<PValue, 4> src1;
1177 
1178    for (int i = 0; i < 4 ; ++i) {
1179       if (instr.dest.write_mask & (1 << i)) {
1180          src0[i] = m_src[0][i];
1181          src1[i] = m_src[1][i];
1182       }
1183    }
1184 
1185 
1186    for (int i = 3; i >= 0 ; --i) {
1187       if (!(instr.dest.write_mask & (1 << i)))
1188          continue;
1189       if (use_signed) {
1190          emit_instruction(op2_sub_int, asrc1, {Value::zero, src0[i]}, {alu_write});
1191          emit_instruction(op2_sub_int, asrc2, {Value::zero, src1[i]}, {alu_write});
1192          emit_instruction(op2_xor_int, rsign, {src0[i], src1[i]}, {alu_write, alu_last_instr});
1193 
1194 
1195          emit_instruction(op3_cndge_int, asrc1, {src0[i], src0[i], asrc1}, {alu_write});
1196          emit_instruction(op3_cndge_int, asrc2, {src1[i], src1[i], asrc2}, {alu_write, alu_last_instr});
1197       } else {
1198          asrc1 = src0[i];
1199          asrc2 = src1[i];
1200       }
1201 
1202       emit_instruction(op1_recip_uint,  tmp0.x(), {asrc2}, {alu_write, alu_last_instr});
1203 
1204       emit_instruction(op2_mullo_uint,  tmp0.z(), {tmp0.x(), asrc2}, {alu_write, alu_last_instr});
1205 
1206       emit_instruction(op2_sub_int,  tmp0.w(), {Value::zero, tmp0.z()}, {alu_write});
1207       emit_instruction(op2_mulhi_uint,  tmp0.y(), {tmp0.x(), asrc2 }, {alu_write, alu_last_instr});
1208 
1209       emit_instruction(op3_cnde_int,  tmp0.z(), {tmp0.y(), tmp0.w(), tmp0.z()}, {alu_write, alu_last_instr});
1210 
1211       emit_instruction(op2_mulhi_uint,  err, {tmp0.z(), tmp0.x()}, {alu_write, alu_last_instr});
1212 
1213       emit_instruction(op2_sub_int, tmp1.x(), {tmp0.x(), err}, {alu_write});
1214       emit_instruction(op2_add_int, tmp1.y(), {tmp0.x(), err}, {alu_write, alu_last_instr});
1215 
1216       emit_instruction(op3_cnde_int,  tmp0.x(), {tmp0.y(), tmp1.y(), tmp1.x()}, {alu_write, alu_last_instr});
1217 
1218       emit_instruction(op2_mulhi_uint,  tmp0.z(), {tmp0.x(), asrc1 }, {alu_write, alu_last_instr});
1219       emit_instruction(op2_mullo_uint,  tmp0.y(), {tmp0.z(), asrc2 }, {alu_write, alu_last_instr});
1220 
1221       emit_instruction(op2_sub_int,  tmp0.w(), {asrc1, tmp0.y()}, {alu_write, alu_last_instr});
1222 
1223 
1224       emit_instruction(op2_setge_uint,  tmp1.x(), {tmp0.w(), asrc2}, {alu_write});
1225       emit_instruction(op2_setge_uint,  tmp1.y(), {asrc1, tmp0.y()}, {alu_write});
1226 
1227       if (mod) {
1228          emit_instruction(op2_sub_int,  tmp1.z(), {tmp0.w(), asrc2}, {alu_write});
1229          emit_instruction(op2_add_int,  tmp1.w(), {tmp0.w(), asrc2}, {alu_write, alu_last_instr});
1230       } else {
1231          emit_instruction(op2_add_int,  tmp1.z(), {tmp0.z(), Value::one_i}, {alu_write});
1232          emit_instruction(op2_sub_int,  tmp1.w(), {tmp0.z(), Value::one_i}, {alu_write, alu_last_instr});
1233       }
1234 
1235       emit_instruction(op2_and_int,  tmp1.x(), {tmp1.x(), tmp1.y()}, {alu_write, alu_last_instr});
1236 
1237       if (mod)
1238          emit_instruction(op3_cnde_int,  tmp0.z(), {tmp1.x(), tmp0.w(), tmp1.z()}, {alu_write, alu_last_instr});
1239       else
1240          emit_instruction(op3_cnde_int,  tmp0.z(), {tmp1.x(), tmp0.z(), tmp1.z()}, {alu_write, alu_last_instr});
1241 
1242       if (use_signed) {
1243          emit_instruction(op3_cnde_int,  tmp0.z(), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1244          emit_instruction(op2_sub_int,  tmp0.y(), {Value::zero, tmp0.z()}, {alu_write, alu_last_instr});
1245 
1246          if (mod)
1247             emit_instruction(op3_cndge_int,  from_nir(instr.dest, i), {src0[i], tmp0.z(), tmp0.y()},
1248                           {alu_write, alu_last_instr});
1249          else
1250             emit_instruction(op3_cndge_int,  from_nir(instr.dest, i), {rsign, tmp0.z(), tmp0.y()},
1251                           {alu_write, alu_last_instr});
1252       } else {
1253          emit_instruction(op3_cnde_int,  from_nir(instr.dest, i), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1254       }
1255    }
1256    return true;
1257 }
1258 
split_alu_modifiers(const nir_alu_src & src,const GPRVector::Values & v,GPRVector::Values & out,int ncomp)1259 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src,
1260                                              const GPRVector::Values& v, GPRVector::Values& out, int ncomp)
1261 {
1262 
1263    AluInstruction *alu = nullptr;
1264    for (int i = 0; i < ncomp; ++i) {
1265       alu  = new AluInstruction(op1_mov,  out[i], v[i], {alu_write});
1266       if (src.abs)
1267          alu->set_flag(alu_src0_abs);
1268       if (src.negate)
1269          alu->set_flag(alu_src0_neg);
1270       emit_instruction(alu);
1271    }
1272    make_last(alu);
1273 }
1274 
emit_tex_fdd(const nir_alu_instr & instr,TexInstruction::Opcode op,bool fine)1275 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
1276                                       bool fine)
1277 {
1278 
1279    GPRVector::Values v;
1280    std::array<int, 4> writemask = {0,1,2,3};
1281 
1282    int ncomp = nir_src_num_components(instr.src[0].src);
1283 
1284    GPRVector::Swizzle src_swz;
1285    for (auto i = 0; i < 4; ++i) {
1286       src_swz[i] = instr.src[0].swizzle[i];
1287    }
1288 
1289    auto src = vec_from_nir_with_fetch_constant(instr.src[0].src, (1 << ncomp) - 1, src_swz);
1290 
1291    if (instr.src[0].abs || instr.src[0].negate) {
1292       GPRVector tmp = get_temp_vec4();
1293       split_alu_modifiers(instr.src[0], src.values(), tmp.values(), ncomp);
1294       src = tmp;
1295    }
1296 
1297    for (int i = 0; i < 4; ++i) {
1298       writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
1299       v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
1300    }
1301 
1302    /* This is querying the dreivatives of the output fb, so we would either need
1303     * access to the neighboring pixels or to the framebuffer. Neither is currently
1304     * implemented */
1305    GPRVector dst(v);
1306 
1307    auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
1308    tex->set_dest_swizzle(writemask);
1309 
1310    if (fine) {
1311       std::cerr << "Sewt fine flag\n";
1312       tex->set_flag(TexInstruction::grad_fine);
1313    }
1314 
1315    emit_instruction(tex);
1316 
1317    return true;
1318 }
1319 
emit_bitfield_extract(const nir_alu_instr & instr,EAluOp opcode)1320 bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr& instr, EAluOp opcode)
1321 {
1322    int itmp = allocate_temp_register();
1323    std::array<PValue, 4> tmp;
1324    std::array<PValue, 4> dst;
1325    std::array<PValue, 4> src0;
1326    std::array<PValue, 4> shift;
1327 
1328    PValue l32(new LiteralValue(32));
1329    unsigned write_mask = instr.dest.write_mask;
1330 
1331    AluInstruction *ir = nullptr;
1332    for (int i = 0; i < 4; i++) {
1333       if (!(write_mask & (1<<i)))
1334 			continue;
1335       dst[i] = from_nir(instr.dest, i);
1336       src0[i] = m_src[0][i];
1337       shift[i] = m_src[2][i];
1338 
1339       ir = new AluInstruction(opcode, dst[i],
1340                               {src0[i], m_src[1][i], shift[i]},
1341                               {alu_write});
1342       emit_instruction(ir);
1343    }
1344    make_last(ir);
1345 
1346    for (int i = 0; i < 4; i++) {
1347       if (!(write_mask & (1<<i)))
1348 			continue;
1349       tmp[i] = PValue(new GPRValue(itmp, i));
1350       ir = new AluInstruction(op2_setge_int, tmp[i], {shift[i], l32},
1351       {alu_write});
1352       emit_instruction(ir);
1353    }
1354    make_last(ir);
1355 
1356    for (int i = 0; i < 4; i++) {
1357       if (!(write_mask & (1<<i)))
1358 			continue;
1359       ir = new AluInstruction(op3_cnde_int, dst[i], {tmp[i], dst[i], src0[i]},
1360                               {alu_write});
1361       emit_instruction(ir);
1362    }
1363    make_last(ir);
1364 
1365    return true;
1366 }
1367 
emit_bitfield_insert(const nir_alu_instr & instr)1368 bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr& instr)
1369 {
1370    auto t0 = get_temp_vec4();
1371    auto t1 = get_temp_vec4();
1372    auto t2 = get_temp_vec4();
1373    auto t3 = get_temp_vec4();
1374 
1375    PValue l32(new LiteralValue(32));
1376    unsigned write_mask = instr.dest.write_mask;
1377    if (!write_mask) return true;
1378 
1379    AluInstruction *ir = nullptr;
1380    for (int i = 0; i < 4; i++) {
1381       if (!(write_mask & (1<<i)))
1382 			continue;
1383 
1384       ir = new AluInstruction(op2_setge_int, t0[i], {m_src[3][i], l32}, {alu_write});
1385       emit_instruction(ir);
1386    }
1387    make_last(ir);
1388 
1389    for (int i = 0; i < 4; i++) {
1390       if (!(write_mask & (1<<i)))
1391 			continue;
1392       ir = new AluInstruction(op2_bfm_int, t1[i], {m_src[3][i], m_src[2][i]}, {alu_write});
1393       emit_instruction(ir);
1394    }
1395    make_last(ir);
1396 
1397    for (int i = 0; i < 4; i++) {
1398       if (!(write_mask & (1<<i)))
1399 			continue;
1400       ir = new AluInstruction(op2_lshl_int, t2[i], {m_src[1][i], m_src[2][i]}, {alu_write});
1401       emit_instruction(ir);
1402    }
1403    make_last(ir);
1404 
1405 
1406    for (int i = 0; i < 4; i++) {
1407       if (!(write_mask & (1<<i)))
1408 			continue;
1409       ir = new AluInstruction(op3_bfi_int, t3[i],
1410                   {t1[i], t2[i], m_src[0][i]}, {alu_write});
1411       emit_instruction(ir);
1412    }
1413    make_last(ir);
1414 
1415    for (int i = 0; i < 4; i++) {
1416       if (!(write_mask & (1<<i)))
1417 			continue;
1418       ir = new AluInstruction(op3_cnde_int, from_nir(instr.dest, i),
1419                              {t0[i], t3[i], m_src[1][i]}, {alu_write});
1420       emit_instruction(ir);
1421    }
1422    make_last(ir);
1423 
1424    return true;
1425 }
1426 
emit_unpack_32_2x16_split_y(const nir_alu_instr & instr)1427 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
1428 {
1429    auto tmp = get_temp_register();
1430    emit_instruction(op2_lshr_int, tmp,
1431    {m_src[0][0], PValue(new LiteralValue(16))},
1432    {alu_write, alu_last_instr});
1433 
1434    emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1435                                   {tmp}, {alu_write, alu_last_instr});
1436 
1437    return true;
1438 }
1439 
emit_unpack_32_2x16_split_x(const nir_alu_instr & instr)1440 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
1441 {
1442    emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1443    {m_src[0][0]},{alu_write, alu_last_instr});
1444    return true;
1445 }
1446 
emit_pack_32_2x16_split(const nir_alu_instr & instr)1447 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
1448 {
1449    PValue x = get_temp_register();
1450    PValue y = get_temp_register();
1451 
1452    emit_instruction(op1_flt32_to_flt16, x,{m_src[0][0]},{alu_write});
1453    emit_instruction(op1_flt32_to_flt16, y,{m_src[1][0]},{alu_write, alu_last_instr});
1454 
1455    emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
1456 
1457    emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
1458 
1459    return true;
1460 }
1461 
1462 }
1463