1 /* -*- c++ -*- */
2 /*
3  * Copyright © 2011-2015 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #ifndef BRW_IR_VEC4_H
26 #define BRW_IR_VEC4_H
27 
28 #include "brw_shader.h"
29 
30 namespace brw {
31 
32 class dst_reg;
33 
34 class src_reg : public backend_reg
35 {
36 public:
37    DECLARE_RALLOC_CXX_OPERATORS(src_reg)
38 
39    void init();
40 
41    src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
42    src_reg();
43    src_reg(struct ::brw_reg reg);
44 
45    bool equals(const src_reg &r) const;
46 
47    src_reg(class vec4_visitor *v, const struct glsl_type *type);
48    src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
49 
50    explicit src_reg(const dst_reg &reg);
51 
52    src_reg *reladdr;
53 };
54 
55 static inline src_reg
retype(src_reg reg,enum brw_reg_type type)56 retype(src_reg reg, enum brw_reg_type type)
57 {
58    reg.type = type;
59    return reg;
60 }
61 
62 namespace detail {
63 
64 static inline void
add_byte_offset(backend_reg * reg,unsigned bytes)65 add_byte_offset(backend_reg *reg, unsigned bytes)
66 {
67    switch (reg->file) {
68       case BAD_FILE:
69          break;
70       case VGRF:
71       case ATTR:
72       case UNIFORM:
73          reg->offset += bytes;
74          assert(reg->offset % 16 == 0);
75          break;
76       case MRF: {
77          const unsigned suboffset = reg->offset + bytes;
78          reg->nr += suboffset / REG_SIZE;
79          reg->offset = suboffset % REG_SIZE;
80          assert(reg->offset % 16 == 0);
81          break;
82       }
83       case ARF:
84       case FIXED_GRF: {
85          const unsigned suboffset = reg->subnr + bytes;
86          reg->nr += suboffset / REG_SIZE;
87          reg->subnr = suboffset % REG_SIZE;
88          assert(reg->subnr % 16 == 0);
89          break;
90       }
91       default:
92          assert(bytes == 0);
93    }
94 }
95 
96 } /* namepace detail */
97 
98 static inline src_reg
byte_offset(src_reg reg,unsigned bytes)99 byte_offset(src_reg reg, unsigned bytes)
100 {
101    detail::add_byte_offset(&reg, bytes);
102    return reg;
103 }
104 
105 static inline src_reg
offset(src_reg reg,unsigned width,unsigned delta)106 offset(src_reg reg, unsigned width, unsigned delta)
107 {
108    const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
109    const unsigned num_components = MAX2(width / 4 * stride, 4);
110    return byte_offset(reg, num_components * type_sz(reg.type) * delta);
111 }
112 
113 static inline src_reg
horiz_offset(src_reg reg,unsigned delta)114 horiz_offset(src_reg reg, unsigned delta)
115 {
116    return byte_offset(reg, delta * type_sz(reg.type));
117 }
118 
119 /**
120  * Reswizzle a given source register.
121  * \sa brw_swizzle().
122  */
123 static inline src_reg
swizzle(src_reg reg,unsigned swizzle)124 swizzle(src_reg reg, unsigned swizzle)
125 {
126    if (reg.file == IMM)
127       reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle);
128    else
129       reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle);
130 
131    return reg;
132 }
133 
134 static inline src_reg
negate(src_reg reg)135 negate(src_reg reg)
136 {
137    assert(reg.file != IMM);
138    reg.negate = !reg.negate;
139    return reg;
140 }
141 
142 static inline bool
is_uniform(const src_reg & reg)143 is_uniform(const src_reg &reg)
144 {
145    return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) &&
146           (!reg.reladdr || is_uniform(*reg.reladdr));
147 }
148 
149 class dst_reg : public backend_reg
150 {
151 public:
152    DECLARE_RALLOC_CXX_OPERATORS(dst_reg)
153 
154    void init();
155 
156    dst_reg();
157    dst_reg(enum brw_reg_file file, int nr);
158    dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
159            unsigned writemask);
160    dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
161            unsigned writemask);
162    dst_reg(struct ::brw_reg reg);
163    dst_reg(class vec4_visitor *v, const struct glsl_type *type);
164 
165    explicit dst_reg(const src_reg &reg);
166 
167    bool equals(const dst_reg &r) const;
168 
169    src_reg *reladdr;
170 };
171 
172 static inline dst_reg
retype(dst_reg reg,enum brw_reg_type type)173 retype(dst_reg reg, enum brw_reg_type type)
174 {
175    reg.type = type;
176    return reg;
177 }
178 
179 static inline dst_reg
byte_offset(dst_reg reg,unsigned bytes)180 byte_offset(dst_reg reg, unsigned bytes)
181 {
182    detail::add_byte_offset(&reg, bytes);
183    return reg;
184 }
185 
186 static inline dst_reg
offset(dst_reg reg,unsigned width,unsigned delta)187 offset(dst_reg reg, unsigned width, unsigned delta)
188 {
189    const unsigned stride = (reg.file == UNIFORM ? 0 : 4);
190    const unsigned num_components = MAX2(width / 4 * stride, 4);
191    return byte_offset(reg, num_components * type_sz(reg.type) * delta);
192 }
193 
194 static inline dst_reg
horiz_offset(const dst_reg & reg,unsigned delta)195 horiz_offset(const dst_reg &reg, unsigned delta)
196 {
197    if (is_uniform(src_reg(reg)))
198       return reg;
199    else
200       return byte_offset(reg, delta * type_sz(reg.type));
201 }
202 
203 static inline dst_reg
writemask(dst_reg reg,unsigned mask)204 writemask(dst_reg reg, unsigned mask)
205 {
206    assert(reg.file != IMM);
207    assert((reg.writemask & mask) != 0);
208    reg.writemask &= mask;
209    return reg;
210 }
211 
212 /**
213  * Return an integer identifying the discrete address space a register is
214  * contained in.  A register is by definition fully contained in the single
215  * reg_space it belongs to, so two registers with different reg_space ids are
216  * guaranteed not to overlap.  Most register files are a single reg_space of
217  * its own, only the VGRF file is composed of multiple discrete address
218  * spaces, one for each VGRF allocation.
219  */
220 static inline uint32_t
reg_space(const backend_reg & r)221 reg_space(const backend_reg &r)
222 {
223    return r.file << 16 | (r.file == VGRF ? r.nr : 0);
224 }
225 
226 /**
227  * Return the base offset in bytes of a register relative to the start of its
228  * reg_space().
229  */
230 static inline unsigned
reg_offset(const backend_reg & r)231 reg_offset(const backend_reg &r)
232 {
233    return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
234           (r.file == UNIFORM ? 16 : REG_SIZE) + r.offset +
235           (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
236 }
237 
238 /**
239  * Return whether the register region starting at \p r and spanning \p dr
240  * bytes could potentially overlap the register region starting at \p s and
241  * spanning \p ds bytes.
242  */
243 static inline bool
regions_overlap(const backend_reg & r,unsigned dr,const backend_reg & s,unsigned ds)244 regions_overlap(const backend_reg &r, unsigned dr,
245                 const backend_reg &s, unsigned ds)
246 {
247    if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
248       /* COMPR4 regions are translated by the hardware during decompression
249        * into two separate half-regions 4 MRFs apart from each other.
250        */
251       backend_reg t0 = r;
252       t0.nr &= ~BRW_MRF_COMPR4;
253       backend_reg t1 = t0;
254       t1.offset += 4 * REG_SIZE;
255       return regions_overlap(t0, dr / 2, s, ds) ||
256              regions_overlap(t1, dr / 2, s, ds);
257 
258    } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
259       return regions_overlap(s, ds, r, dr);
260 
261    } else {
262       return reg_space(r) == reg_space(s) &&
263              !(reg_offset(r) + dr <= reg_offset(s) ||
264                reg_offset(s) + ds <= reg_offset(r));
265    }
266 }
267 
268 class vec4_instruction : public backend_instruction {
269 public:
270    DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
271 
272    vec4_instruction(enum opcode opcode,
273                     const dst_reg &dst = dst_reg(),
274                     const src_reg &src0 = src_reg(),
275                     const src_reg &src1 = src_reg(),
276                     const src_reg &src2 = src_reg());
277 
278    dst_reg dst;
279    src_reg src[3];
280 
281    enum brw_urb_write_flags urb_write_flags;
282 
283    unsigned sol_binding; /**< gen6: SOL binding table index */
284    bool sol_final_write; /**< gen6: send commit message */
285    unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */
286 
287    bool is_send_from_grf();
288    unsigned size_read(unsigned arg) const;
289    bool can_reswizzle(const struct gen_device_info *devinfo, int dst_writemask,
290                       int swizzle, int swizzle_mask);
291    void reswizzle(int dst_writemask, int swizzle);
292    bool can_do_source_mods(const struct gen_device_info *devinfo);
293    bool can_do_writemask(const struct gen_device_info *devinfo);
294    bool can_change_types() const;
295    bool has_source_and_destination_hazard() const;
296 
is_align1_partial_write()297    bool is_align1_partial_write()
298    {
299       return opcode == VEC4_OPCODE_SET_LOW_32BIT ||
300              opcode == VEC4_OPCODE_SET_HIGH_32BIT;
301    }
302 
reads_flag()303    bool reads_flag()
304    {
305       return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
306    }
307 
reads_flag(unsigned c)308    bool reads_flag(unsigned c)
309    {
310       if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
311          return true;
312 
313       switch (predicate) {
314       case BRW_PREDICATE_NONE:
315          return false;
316       case BRW_PREDICATE_ALIGN16_REPLICATE_X:
317          return c == 0;
318       case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
319          return c == 1;
320       case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
321          return c == 2;
322       case BRW_PREDICATE_ALIGN16_REPLICATE_W:
323          return c == 3;
324       default:
325          return true;
326       }
327    }
328 
writes_flag()329    bool writes_flag()
330    {
331       return (conditional_mod && (opcode != BRW_OPCODE_SEL &&
332                                   opcode != BRW_OPCODE_IF &&
333                                   opcode != BRW_OPCODE_WHILE));
334    }
335 
reads_g0_implicitly()336    bool reads_g0_implicitly() const
337    {
338       switch (opcode) {
339       case SHADER_OPCODE_TEX:
340       case SHADER_OPCODE_TXL:
341       case SHADER_OPCODE_TXD:
342       case SHADER_OPCODE_TXF:
343       case SHADER_OPCODE_TXF_CMS_W:
344       case SHADER_OPCODE_TXF_CMS:
345       case SHADER_OPCODE_TXF_MCS:
346       case SHADER_OPCODE_TXS:
347       case SHADER_OPCODE_TG4:
348       case SHADER_OPCODE_TG4_OFFSET:
349       case SHADER_OPCODE_SAMPLEINFO:
350       case VS_OPCODE_PULL_CONSTANT_LOAD:
351       case GS_OPCODE_SET_PRIMITIVE_ID:
352       case GS_OPCODE_GET_INSTANCE_ID:
353       case SHADER_OPCODE_GEN4_SCRATCH_READ:
354       case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
355          return true;
356       default:
357          return false;
358       }
359    }
360 };
361 
362 /**
363  * Make the execution of \p inst dependent on the evaluation of a possibly
364  * inverted predicate.
365  */
366 inline vec4_instruction *
set_predicate_inv(enum brw_predicate pred,bool inverse,vec4_instruction * inst)367 set_predicate_inv(enum brw_predicate pred, bool inverse,
368                   vec4_instruction *inst)
369 {
370    inst->predicate = pred;
371    inst->predicate_inverse = inverse;
372    return inst;
373 }
374 
375 /**
376  * Make the execution of \p inst dependent on the evaluation of a predicate.
377  */
378 inline vec4_instruction *
set_predicate(enum brw_predicate pred,vec4_instruction * inst)379 set_predicate(enum brw_predicate pred, vec4_instruction *inst)
380 {
381    return set_predicate_inv(pred, false, inst);
382 }
383 
384 /**
385  * Write the result of evaluating the condition given by \p mod to a flag
386  * register.
387  */
388 inline vec4_instruction *
set_condmod(enum brw_conditional_mod mod,vec4_instruction * inst)389 set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst)
390 {
391    inst->conditional_mod = mod;
392    return inst;
393 }
394 
395 /**
396  * Clamp the result of \p inst to the saturation range of its destination
397  * datatype.
398  */
399 inline vec4_instruction *
set_saturate(bool saturate,vec4_instruction * inst)400 set_saturate(bool saturate, vec4_instruction *inst)
401 {
402    inst->saturate = saturate;
403    return inst;
404 }
405 
406 /**
407  * Return the number of dataflow registers written by the instruction (either
408  * fully or partially) counted from 'floor(reg_offset(inst->dst) /
409  * register_size)'.  The somewhat arbitrary register size unit is 16B for the
410  * UNIFORM and IMM files and 32B for all other files.
411  */
412 inline unsigned
regs_written(const vec4_instruction * inst)413 regs_written(const vec4_instruction *inst)
414 {
415    assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
416    return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written,
417                        REG_SIZE);
418 }
419 
420 /**
421  * Return the number of dataflow registers read by the instruction (either
422  * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
423  * register_size)'.  The somewhat arbitrary register size unit is 16B for the
424  * UNIFORM and IMM files and 32B for all other files.
425  */
426 inline unsigned
regs_read(const vec4_instruction * inst,unsigned i)427 regs_read(const vec4_instruction *inst, unsigned i)
428 {
429    const unsigned reg_size =
430       inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE;
431    return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i),
432                        reg_size);
433 }
434 
435 static inline enum brw_reg_type
get_exec_type(const vec4_instruction * inst)436 get_exec_type(const vec4_instruction *inst)
437 {
438    enum brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
439 
440    for (int i = 0; i < 3; i++) {
441       if (inst->src[i].file != BAD_FILE) {
442          const brw_reg_type t = get_exec_type(brw_reg_type(inst->src[i].type));
443          if (type_sz(t) > type_sz(exec_type))
444             exec_type = t;
445          else if (type_sz(t) == type_sz(exec_type) &&
446                   brw_reg_type_is_floating_point(t))
447             exec_type = t;
448       }
449    }
450 
451    if (exec_type == BRW_REGISTER_TYPE_B)
452       exec_type = inst->dst.type;
453 
454    /* TODO: We need to handle half-float conversions. */
455    assert(exec_type != BRW_REGISTER_TYPE_HF ||
456           inst->dst.type == BRW_REGISTER_TYPE_HF);
457    assert(exec_type != BRW_REGISTER_TYPE_B);
458 
459    return exec_type;
460 }
461 
462 static inline unsigned
get_exec_type_size(const vec4_instruction * inst)463 get_exec_type_size(const vec4_instruction *inst)
464 {
465    return type_sz(get_exec_type(inst));
466 }
467 
468 } /* namespace brw */
469 
470 #endif
471