1 /* -*- c++ -*- */
2 /*
3  * Copyright © 2010-2015 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #ifndef BRW_IR_FS_H
26 #define BRW_IR_FS_H
27 
28 #include "brw_shader.h"
29 
30 class fs_inst;
31 
32 class fs_reg : public backend_reg {
33 public:
34    DECLARE_RALLOC_CXX_OPERATORS(fs_reg)
35 
36    void init();
37 
38    fs_reg();
39    fs_reg(struct ::brw_reg reg);
40    fs_reg(enum brw_reg_file file, int nr);
41    fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type);
42 
43    bool equals(const fs_reg &r) const;
44    bool is_contiguous() const;
45 
46    /**
47     * Return the size in bytes of a single logical component of the
48     * register assuming the given execution width.
49     */
50    unsigned component_size(unsigned width) const;
51 
52    /** Register region horizontal stride */
53    uint8_t stride;
54 };
55 
56 static inline fs_reg
negate(fs_reg reg)57 negate(fs_reg reg)
58 {
59    assert(reg.file != IMM);
60    reg.negate = !reg.negate;
61    return reg;
62 }
63 
64 static inline fs_reg
retype(fs_reg reg,enum brw_reg_type type)65 retype(fs_reg reg, enum brw_reg_type type)
66 {
67    reg.type = type;
68    return reg;
69 }
70 
71 static inline fs_reg
byte_offset(fs_reg reg,unsigned delta)72 byte_offset(fs_reg reg, unsigned delta)
73 {
74    switch (reg.file) {
75    case BAD_FILE:
76       break;
77    case VGRF:
78    case ATTR:
79    case UNIFORM:
80       reg.offset += delta;
81       break;
82    case MRF: {
83       const unsigned suboffset = reg.offset + delta;
84       reg.nr += suboffset / REG_SIZE;
85       reg.offset = suboffset % REG_SIZE;
86       break;
87    }
88    case ARF:
89    case FIXED_GRF: {
90       const unsigned suboffset = reg.subnr + delta;
91       reg.nr += suboffset / REG_SIZE;
92       reg.subnr = suboffset % REG_SIZE;
93       break;
94    }
95    case IMM:
96    default:
97       assert(delta == 0);
98    }
99    return reg;
100 }
101 
102 static inline fs_reg
horiz_offset(const fs_reg & reg,unsigned delta)103 horiz_offset(const fs_reg &reg, unsigned delta)
104 {
105    switch (reg.file) {
106    case BAD_FILE:
107    case UNIFORM:
108    case IMM:
109       /* These only have a single component that is implicitly splatted.  A
110        * horizontal offset should be a harmless no-op.
111        * XXX - Handle vector immediates correctly.
112        */
113       return reg;
114    case VGRF:
115    case MRF:
116    case ATTR:
117       return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
118    case ARF:
119    case FIXED_GRF:
120       if (reg.is_null()) {
121          return reg;
122       } else {
123          const unsigned stride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
124          return byte_offset(reg, delta * stride * type_sz(reg.type));
125       }
126    }
127    unreachable("Invalid register file");
128 }
129 
130 static inline fs_reg
offset(fs_reg reg,unsigned width,unsigned delta)131 offset(fs_reg reg, unsigned width, unsigned delta)
132 {
133    switch (reg.file) {
134    case BAD_FILE:
135       break;
136    case ARF:
137    case FIXED_GRF:
138    case MRF:
139    case VGRF:
140    case ATTR:
141    case UNIFORM:
142       return byte_offset(reg, delta * reg.component_size(width));
143    case IMM:
144       assert(delta == 0);
145    }
146    return reg;
147 }
148 
149 /**
150  * Get the scalar channel of \p reg given by \p idx and replicate it to all
151  * channels of the result.
152  */
153 static inline fs_reg
component(fs_reg reg,unsigned idx)154 component(fs_reg reg, unsigned idx)
155 {
156    reg = horiz_offset(reg, idx);
157    reg.stride = 0;
158    return reg;
159 }
160 
161 /**
162  * Return an integer identifying the discrete address space a register is
163  * contained in.  A register is by definition fully contained in the single
164  * reg_space it belongs to, so two registers with different reg_space ids are
165  * guaranteed not to overlap.  Most register files are a single reg_space of
166  * its own, only the VGRF file is composed of multiple discrete address
167  * spaces, one for each VGRF allocation.
168  */
169 static inline uint32_t
reg_space(const fs_reg & r)170 reg_space(const fs_reg &r)
171 {
172    return r.file << 16 | (r.file == VGRF ? r.nr : 0);
173 }
174 
175 /**
176  * Return the base offset in bytes of a register relative to the start of its
177  * reg_space().
178  */
179 static inline unsigned
reg_offset(const fs_reg & r)180 reg_offset(const fs_reg &r)
181 {
182    return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
183           (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
184           (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
185 }
186 
187 /**
188  * Return the amount of padding in bytes left unused between individual
189  * components of register \p r due to a (horizontal) stride value greater than
190  * one, or zero if components are tightly packed in the register file.
191  */
192 static inline unsigned
reg_padding(const fs_reg & r)193 reg_padding(const fs_reg &r)
194 {
195    const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
196                             r.hstride == 0 ? 0 :
197                             1 << (r.hstride - 1));
198    return (MAX2(1, stride) - 1) * type_sz(r.type);
199 }
200 
201 /**
202  * Return whether the register region starting at \p r and spanning \p dr
203  * bytes could potentially overlap the register region starting at \p s and
204  * spanning \p ds bytes.
205  */
206 static inline bool
regions_overlap(const fs_reg & r,unsigned dr,const fs_reg & s,unsigned ds)207 regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
208 {
209    if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
210       fs_reg t = r;
211       t.nr &= ~BRW_MRF_COMPR4;
212       /* COMPR4 regions are translated by the hardware during decompression
213        * into two separate half-regions 4 MRFs apart from each other.
214        */
215       return regions_overlap(t, dr / 2, s, ds) ||
216              regions_overlap(byte_offset(t, 4 * REG_SIZE), dr / 2, s, ds);
217 
218    } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
219       return regions_overlap(s, ds, r, dr);
220 
221    } else {
222       return reg_space(r) == reg_space(s) &&
223              !(reg_offset(r) + dr <= reg_offset(s) ||
224                reg_offset(s) + ds <= reg_offset(r));
225    }
226 }
227 
228 /**
229  * Check that the register region given by r [r.offset, r.offset + dr[
230  * is fully contained inside the register region given by s
231  * [s.offset, s.offset + ds[.
232  */
233 static inline bool
region_contained_in(const fs_reg & r,unsigned dr,const fs_reg & s,unsigned ds)234 region_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
235 {
236    return reg_space(r) == reg_space(s) &&
237           reg_offset(r) >= reg_offset(s) &&
238           reg_offset(r) + dr <= reg_offset(s) + ds;
239 }
240 
241 /**
242  * Return whether the given register region is n-periodic, i.e. whether the
243  * original region remains invariant after shifting it by \p n scalar
244  * channels.
245  */
246 static inline bool
is_periodic(const fs_reg & reg,unsigned n)247 is_periodic(const fs_reg &reg, unsigned n)
248 {
249    if (reg.file == BAD_FILE || reg.is_null()) {
250       return true;
251 
252    } else if (reg.file == IMM) {
253       const unsigned period = (reg.type == BRW_REGISTER_TYPE_UV ||
254                                reg.type == BRW_REGISTER_TYPE_V ? 8 :
255                                reg.type == BRW_REGISTER_TYPE_VF ? 4 :
256                                1);
257       return n % period == 0;
258 
259    } else if (reg.file == ARF || reg.file == FIXED_GRF) {
260       const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
261                                reg.vstride == 0 ? 1 << reg.width :
262                                ~0);
263       return n % period == 0;
264 
265    } else {
266       return reg.stride == 0;
267    }
268 }
269 
270 static inline bool
is_uniform(const fs_reg & reg)271 is_uniform(const fs_reg &reg)
272 {
273    return is_periodic(reg, 1);
274 }
275 
276 /**
277  * Get the specified 8-component quarter of a register.
278  * XXX - Maybe come up with a less misleading name for this (e.g. quarter())?
279  */
280 static inline fs_reg
half(const fs_reg & reg,unsigned idx)281 half(const fs_reg &reg, unsigned idx)
282 {
283    assert(idx < 2);
284    return horiz_offset(reg, 8 * idx);
285 }
286 
287 /**
288  * Reinterpret each channel of register \p reg as a vector of values of the
289  * given smaller type and take the i-th subcomponent from each.
290  */
291 static inline fs_reg
subscript(fs_reg reg,brw_reg_type type,unsigned i)292 subscript(fs_reg reg, brw_reg_type type, unsigned i)
293 {
294    assert((i + 1) * type_sz(type) <= type_sz(reg.type));
295 
296    if (reg.file == ARF || reg.file == FIXED_GRF) {
297       /* The stride is encoded inconsistently for fixed GRF and ARF registers
298        * as the log2 of the actual vertical and horizontal strides.
299        */
300       const int delta = _mesa_logbase2(type_sz(reg.type)) -
301                         _mesa_logbase2(type_sz(type));
302       reg.hstride += (reg.hstride ? delta : 0);
303       reg.vstride += (reg.vstride ? delta : 0);
304 
305    } else if (reg.file == IMM) {
306       assert(reg.type == type);
307 
308    } else {
309       reg.stride *= type_sz(reg.type) / type_sz(type);
310    }
311 
312    return byte_offset(retype(reg, type), i * type_sz(type));
313 }
314 
315 static const fs_reg reg_undef;
316 
317 class fs_inst : public backend_instruction {
318    fs_inst &operator=(const fs_inst &);
319 
320    void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
321              const fs_reg *src, unsigned sources);
322 
323 public:
324    DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
325 
326    fs_inst();
327    fs_inst(enum opcode opcode, uint8_t exec_size);
328    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst);
329    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
330            const fs_reg &src0);
331    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
332            const fs_reg &src0, const fs_reg &src1);
333    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
334            const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
335    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
336            const fs_reg src[], unsigned sources);
337    fs_inst(const fs_inst &that);
338    ~fs_inst();
339 
340    void resize_sources(uint8_t num_sources);
341 
342    bool equals(fs_inst *inst) const;
343    bool is_send_from_grf() const;
344    bool is_partial_write() const;
345    bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
346    unsigned components_read(unsigned i) const;
347    unsigned size_read(int arg) const;
348    bool can_do_source_mods(const struct gen_device_info *devinfo);
349    bool can_change_types() const;
350    bool has_source_and_destination_hazard() const;
351 
352    /**
353     * Return the subset of flag registers read by the instruction as a bitset
354     * with byte granularity.
355     */
356    unsigned flags_read(const gen_device_info *devinfo) const;
357 
358    /**
359     * Return the subset of flag registers updated by the instruction (either
360     * partially or fully) as a bitset with byte granularity.
361     */
362    unsigned flags_written() const;
363 
364    fs_reg dst;
365    fs_reg *src;
366 
367    uint8_t sources; /**< Number of fs_reg sources. */
368 
369    bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag */
370 };
371 
372 /**
373  * Make the execution of \p inst dependent on the evaluation of a possibly
374  * inverted predicate.
375  */
376 static inline fs_inst *
set_predicate_inv(enum brw_predicate pred,bool inverse,fs_inst * inst)377 set_predicate_inv(enum brw_predicate pred, bool inverse,
378                   fs_inst *inst)
379 {
380    inst->predicate = pred;
381    inst->predicate_inverse = inverse;
382    return inst;
383 }
384 
385 /**
386  * Make the execution of \p inst dependent on the evaluation of a predicate.
387  */
388 static inline fs_inst *
set_predicate(enum brw_predicate pred,fs_inst * inst)389 set_predicate(enum brw_predicate pred, fs_inst *inst)
390 {
391    return set_predicate_inv(pred, false, inst);
392 }
393 
394 /**
395  * Write the result of evaluating the condition given by \p mod to a flag
396  * register.
397  */
398 static inline fs_inst *
set_condmod(enum brw_conditional_mod mod,fs_inst * inst)399 set_condmod(enum brw_conditional_mod mod, fs_inst *inst)
400 {
401    inst->conditional_mod = mod;
402    return inst;
403 }
404 
405 /**
406  * Clamp the result of \p inst to the saturation range of its destination
407  * datatype.
408  */
409 static inline fs_inst *
set_saturate(bool saturate,fs_inst * inst)410 set_saturate(bool saturate, fs_inst *inst)
411 {
412    inst->saturate = saturate;
413    return inst;
414 }
415 
416 /**
417  * Return the number of dataflow registers written by the instruction (either
418  * fully or partially) counted from 'floor(reg_offset(inst->dst) /
419  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
420  * UNIFORM and IMM files and 32B for all other files.
421  */
422 inline unsigned
regs_written(const fs_inst * inst)423 regs_written(const fs_inst *inst)
424 {
425    assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
426    return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE +
427                        inst->size_written -
428                        MIN2(inst->size_written, reg_padding(inst->dst)),
429                        REG_SIZE);
430 }
431 
432 /**
433  * Return the number of dataflow registers read by the instruction (either
434  * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
435  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
436  * UNIFORM and IMM files and 32B for all other files.
437  */
438 inline unsigned
regs_read(const fs_inst * inst,unsigned i)439 regs_read(const fs_inst *inst, unsigned i)
440 {
441    const unsigned reg_size =
442       inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE;
443    return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size +
444                        inst->size_read(i) -
445                        MIN2(inst->size_read(i), reg_padding(inst->src[i])),
446                        reg_size);
447 }
448 
449 static inline enum brw_reg_type
get_exec_type(const fs_inst * inst)450 get_exec_type(const fs_inst *inst)
451 {
452    brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
453 
454    for (int i = 0; i < inst->sources; i++) {
455       if (inst->src[i].file != BAD_FILE) {
456          const brw_reg_type t = get_exec_type(inst->src[i].type);
457          if (type_sz(t) > type_sz(exec_type))
458             exec_type = t;
459          else if (type_sz(t) == type_sz(exec_type) &&
460                   brw_reg_type_is_floating_point(t))
461             exec_type = t;
462       }
463    }
464 
465    if (exec_type == BRW_REGISTER_TYPE_B)
466       exec_type = inst->dst.type;
467 
468    assert(exec_type != BRW_REGISTER_TYPE_B);
469 
470    return exec_type;
471 }
472 
473 static inline unsigned
get_exec_type_size(const fs_inst * inst)474 get_exec_type_size(const fs_inst *inst)
475 {
476    return type_sz(get_exec_type(inst));
477 }
478 
479 #endif
480