1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef GEN_MI_BUILDER_H
25 #define GEN_MI_BUILDER_H
26 
27 #include "genxml/genX_bits.h"
28 #include "util/bitscan.h"
29 #include "util/fast_idiv_by_const.h"
30 #include "util/u_math.h"
31 
32 #ifndef GEN_MI_BUILDER_NUM_ALLOC_GPRS
33 /** The number of GPRs the MI builder is allowed to allocate
34  *
35  * This may be set by a user of this API so that it can reserve some GPRs at
36  * the top end for its own use.
37  */
38 #define GEN_MI_BUILDER_NUM_ALLOC_GPRS 16
39 #endif
40 
41 /** These must be defined by the user of the builder
42  *
43  * void *__gen_get_batch_dwords(__gen_user_data *user_data,
44  *                              unsigned num_dwords);
45  *
46  * __gen_address_type
47  * __gen_address_offset(__gen_address_type addr, uint64_t offset);
48  *
49  *
50  * If self-modifying batches are supported, we must be able to pass batch
51  * addresses around as void*s so pinning as well as batch chaining or some
52  * other mechanism for ensuring batch pointers remain valid during building is
53  * required. The following function must also be defined, it returns an
54  * address in canonical form:
55  *
56  * uint64_t
57  * __gen_get_batch_address(__gen_user_data *user_data, void *location);
58  *
59  * Also, __gen_combine_address must accept a location value of NULL and return
60  * a fully valid 64-bit address.
61  */
62 
63 /*
64  * Start of the actual MI builder
65  */
66 
67 #define __genxml_cmd_length(cmd) cmd ## _length
68 #define __genxml_cmd_header(cmd) cmd ## _header
69 #define __genxml_cmd_pack(cmd) cmd ## _pack
70 
71 #define gen_mi_builder_pack(b, cmd, dst, name)                          \
72    for (struct cmd name = { __genxml_cmd_header(cmd) },                 \
73         *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
74         __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name),    \
75         _dst = NULL)
76 
77 #define gen_mi_builder_emit(b, cmd, name)                               \
78    gen_mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
79 
80 
81 enum gen_mi_value_type {
82    GEN_MI_VALUE_TYPE_IMM,
83    GEN_MI_VALUE_TYPE_MEM32,
84    GEN_MI_VALUE_TYPE_MEM64,
85    GEN_MI_VALUE_TYPE_REG32,
86    GEN_MI_VALUE_TYPE_REG64,
87 };
88 
89 struct gen_mi_value {
90    enum gen_mi_value_type type;
91 
92    union {
93       uint64_t imm;
94       __gen_address_type addr;
95       uint32_t reg;
96    };
97 
98 #if GEN_GEN >= 7 || GEN_IS_HASWELL
99    bool invert;
100 #endif
101 };
102 
103 struct gen_mi_reg_num {
104    uint32_t num;
105 #if GEN_GEN >= 11
106    bool cs;
107 #endif
108 };
109 
110 static inline struct gen_mi_reg_num
gen_mi_adjust_reg_num(uint32_t reg)111 gen_mi_adjust_reg_num(uint32_t reg)
112 {
113 #if GEN_GEN >= 11
114    bool cs = reg >= 0x2000 && reg < 0x4000;
115    return (struct gen_mi_reg_num) {
116       .num = reg - (cs ? 0x2000 : 0),
117       .cs = cs,
118    };
119 #else
120    return (struct gen_mi_reg_num) { .num = reg, };
121 #endif
122 }
123 
124 #if GEN_GEN >= 9
125 #define GEN_MI_BUILDER_MAX_MATH_DWORDS 256
126 #else
127 #define GEN_MI_BUILDER_MAX_MATH_DWORDS 64
128 #endif
129 
130 struct gen_mi_builder {
131    __gen_user_data *user_data;
132 
133 #if GEN_GEN >= 8 || GEN_IS_HASWELL
134    uint32_t gprs;
135    uint8_t gpr_refs[GEN_MI_BUILDER_NUM_ALLOC_GPRS];
136 
137    unsigned num_math_dwords;
138    uint32_t math_dwords[GEN_MI_BUILDER_MAX_MATH_DWORDS];
139 #endif
140 };
141 
142 static inline void
gen_mi_builder_init(struct gen_mi_builder * b,__gen_user_data * user_data)143 gen_mi_builder_init(struct gen_mi_builder *b, __gen_user_data *user_data)
144 {
145    memset(b, 0, sizeof(*b));
146    b->user_data = user_data;
147 
148 #if GEN_GEN >= 8 || GEN_IS_HASWELL
149    b->gprs = 0;
150    b->num_math_dwords = 0;
151 #endif
152 }
153 
154 static inline void
gen_mi_builder_flush_math(struct gen_mi_builder * b)155 gen_mi_builder_flush_math(struct gen_mi_builder *b)
156 {
157 #if GEN_GEN >= 8 || GEN_IS_HASWELL
158    if (b->num_math_dwords == 0)
159       return;
160 
161    uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
162                                                      1 + b->num_math_dwords);
163    gen_mi_builder_pack(b, GENX(MI_MATH), dw, math) {
164       math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);
165    }
166    memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));
167    b->num_math_dwords = 0;
168 #endif
169 }
170 
171 #define _GEN_MI_BUILDER_GPR_BASE 0x2600
172 /* The actual hardware limit on GPRs */
173 #define _GEN_MI_BUILDER_NUM_HW_GPRS 16
174 
175 #if GEN_GEN >= 8 || GEN_IS_HASWELL
176 
177 static inline bool
gen_mi_value_is_gpr(struct gen_mi_value val)178 gen_mi_value_is_gpr(struct gen_mi_value val)
179 {
180    return (val.type == GEN_MI_VALUE_TYPE_REG32 ||
181            val.type == GEN_MI_VALUE_TYPE_REG64) &&
182           val.reg >= _GEN_MI_BUILDER_GPR_BASE &&
183           val.reg < _GEN_MI_BUILDER_GPR_BASE +
184                     _GEN_MI_BUILDER_NUM_HW_GPRS * 8;
185 }
186 
187 static inline bool
_gen_mi_value_is_allocated_gpr(struct gen_mi_value val)188 _gen_mi_value_is_allocated_gpr(struct gen_mi_value val)
189 {
190    return (val.type == GEN_MI_VALUE_TYPE_REG32 ||
191            val.type == GEN_MI_VALUE_TYPE_REG64) &&
192           val.reg >= _GEN_MI_BUILDER_GPR_BASE &&
193           val.reg < _GEN_MI_BUILDER_GPR_BASE +
194                     GEN_MI_BUILDER_NUM_ALLOC_GPRS * 8;
195 }
196 
197 static inline uint32_t
_gen_mi_value_as_gpr(struct gen_mi_value val)198 _gen_mi_value_as_gpr(struct gen_mi_value val)
199 {
200    assert(gen_mi_value_is_gpr(val));
201    assert(val.reg % 8 == 0);
202    return (val.reg - _GEN_MI_BUILDER_GPR_BASE) / 8;
203 }
204 
205 static inline struct gen_mi_value
gen_mi_new_gpr(struct gen_mi_builder * b)206 gen_mi_new_gpr(struct gen_mi_builder *b)
207 {
208    unsigned gpr = ffs(~b->gprs) - 1;
209    assert(gpr < GEN_MI_BUILDER_NUM_ALLOC_GPRS);
210    assert(b->gpr_refs[gpr] == 0);
211    b->gprs |= (1u << gpr);
212    b->gpr_refs[gpr] = 1;
213 
214    return (struct gen_mi_value) {
215       .type = GEN_MI_VALUE_TYPE_REG64,
216       .reg = _GEN_MI_BUILDER_GPR_BASE + gpr * 8,
217    };
218 }
219 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
220 
221 /** Take a reference to a gen_mi_value
222  *
223  * The MI builder uses reference counting to automatically free ALU GPRs for
224  * re-use in calculations.  All gen_mi_* math functions consume the reference
225  * they are handed for each source and return a reference to a value which the
226  * caller must consume.  In particular, if you pas the same value into a
227  * single gen_mi_* math function twice (say to add a number to itself), you
228  * are responsible for calling gen_mi_value_ref() to get a second reference
229  * because the gen_mi_* math function will consume it twice.
230  */
231 static inline struct gen_mi_value
gen_mi_value_ref(struct gen_mi_builder * b,struct gen_mi_value val)232 gen_mi_value_ref(struct gen_mi_builder *b, struct gen_mi_value val)
233 {
234 #if GEN_GEN >= 8 || GEN_IS_HASWELL
235    if (_gen_mi_value_is_allocated_gpr(val)) {
236       unsigned gpr = _gen_mi_value_as_gpr(val);
237       assert(gpr < GEN_MI_BUILDER_NUM_ALLOC_GPRS);
238       assert(b->gprs & (1u << gpr));
239       assert(b->gpr_refs[gpr] < UINT8_MAX);
240       b->gpr_refs[gpr]++;
241    }
242 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
243 
244    return val;
245 }
246 
247 /** Drop a reference to a gen_mi_value
248  *
249  * See also gen_mi_value_ref.
250  */
251 static inline void
gen_mi_value_unref(struct gen_mi_builder * b,struct gen_mi_value val)252 gen_mi_value_unref(struct gen_mi_builder *b, struct gen_mi_value val)
253 {
254 #if GEN_GEN >= 8 || GEN_IS_HASWELL
255    if (_gen_mi_value_is_allocated_gpr(val)) {
256       unsigned gpr = _gen_mi_value_as_gpr(val);
257       assert(gpr < GEN_MI_BUILDER_NUM_ALLOC_GPRS);
258       assert(b->gprs & (1u << gpr));
259       assert(b->gpr_refs[gpr] > 0);
260       if (--b->gpr_refs[gpr] == 0)
261          b->gprs &= ~(1u << gpr);
262    }
263 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
264 }
265 
266 static inline struct gen_mi_value
gen_mi_imm(uint64_t imm)267 gen_mi_imm(uint64_t imm)
268 {
269    return (struct gen_mi_value) {
270       .type = GEN_MI_VALUE_TYPE_IMM,
271       .imm = imm,
272    };
273 }
274 
275 static inline struct gen_mi_value
gen_mi_reg32(uint32_t reg)276 gen_mi_reg32(uint32_t reg)
277 {
278    struct gen_mi_value val = {
279       .type = GEN_MI_VALUE_TYPE_REG32,
280       .reg = reg,
281    };
282 #if GEN_GEN >= 8 || GEN_IS_HASWELL
283    assert(!_gen_mi_value_is_allocated_gpr(val));
284 #endif
285    return val;
286 }
287 
288 static inline struct gen_mi_value
gen_mi_reg64(uint32_t reg)289 gen_mi_reg64(uint32_t reg)
290 {
291    struct gen_mi_value val = {
292       .type = GEN_MI_VALUE_TYPE_REG64,
293       .reg = reg,
294    };
295 #if GEN_GEN >= 8 || GEN_IS_HASWELL
296    assert(!_gen_mi_value_is_allocated_gpr(val));
297 #endif
298    return val;
299 }
300 
301 static inline struct gen_mi_value
gen_mi_mem32(__gen_address_type addr)302 gen_mi_mem32(__gen_address_type addr)
303 {
304    return (struct gen_mi_value) {
305       .type = GEN_MI_VALUE_TYPE_MEM32,
306       .addr = addr,
307    };
308 }
309 
310 static inline struct gen_mi_value
gen_mi_mem64(__gen_address_type addr)311 gen_mi_mem64(__gen_address_type addr)
312 {
313    return (struct gen_mi_value) {
314       .type = GEN_MI_VALUE_TYPE_MEM64,
315       .addr = addr,
316    };
317 }
318 
319 static inline struct gen_mi_value
gen_mi_value_half(struct gen_mi_value value,bool top_32_bits)320 gen_mi_value_half(struct gen_mi_value value, bool top_32_bits)
321 {
322    switch (value.type) {
323    case GEN_MI_VALUE_TYPE_IMM:
324       if (top_32_bits)
325          value.imm >>= 32;
326       else
327          value.imm &= 0xffffffffu;
328       return value;
329 
330    case GEN_MI_VALUE_TYPE_MEM32:
331       assert(!top_32_bits);
332       return value;
333 
334    case GEN_MI_VALUE_TYPE_MEM64:
335       if (top_32_bits)
336          value.addr = __gen_address_offset(value.addr, 4);
337       value.type = GEN_MI_VALUE_TYPE_MEM32;
338       return value;
339 
340    case GEN_MI_VALUE_TYPE_REG32:
341       assert(!top_32_bits);
342       return value;
343 
344    case GEN_MI_VALUE_TYPE_REG64:
345       if (top_32_bits)
346          value.reg += 4;
347       value.type = GEN_MI_VALUE_TYPE_REG32;
348       return value;
349    }
350 
351    unreachable("Invalid gen_mi_value type");
352 }
353 
354 static inline void
_gen_mi_copy_no_unref(struct gen_mi_builder * b,struct gen_mi_value dst,struct gen_mi_value src)355 _gen_mi_copy_no_unref(struct gen_mi_builder *b,
356                       struct gen_mi_value dst, struct gen_mi_value src)
357 {
358 #if GEN_GEN >= 7 || GEN_IS_HASWELL
359    /* TODO: We could handle src.invert by emitting a bit of math if we really
360     * wanted to.
361     */
362    assert(!dst.invert && !src.invert);
363 #endif
364    gen_mi_builder_flush_math(b);
365 
366    switch (dst.type) {
367    case GEN_MI_VALUE_TYPE_IMM:
368       unreachable("Cannot copy to an immediate");
369 
370    case GEN_MI_VALUE_TYPE_MEM64:
371    case GEN_MI_VALUE_TYPE_REG64:
372       /* If the destination is 64 bits, we have to copy in two halves */
373       _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, false),
374                                gen_mi_value_half(src, false));
375       switch (src.type) {
376       case GEN_MI_VALUE_TYPE_IMM:
377       case GEN_MI_VALUE_TYPE_MEM64:
378       case GEN_MI_VALUE_TYPE_REG64:
379          /* TODO: Use MI_STORE_DATA_IMM::StoreQWord when we have it */
380          _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, true),
381                                   gen_mi_value_half(src, true));
382          break;
383       default:
384          _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, true),
385                                   gen_mi_imm(0));
386          break;
387       }
388       break;
389 
390    case GEN_MI_VALUE_TYPE_MEM32:
391       switch (src.type) {
392       case GEN_MI_VALUE_TYPE_IMM:
393          gen_mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {
394             sdi.Address = dst.addr;
395 #if GEN_GEN >= 12
396             sdi.ForceWriteCompletionCheck = true;
397 #endif
398             sdi.ImmediateData = src.imm;
399          }
400          break;
401 
402       case GEN_MI_VALUE_TYPE_MEM32:
403       case GEN_MI_VALUE_TYPE_MEM64:
404 #if GEN_GEN >= 8
405          gen_mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {
406             cmm.DestinationMemoryAddress = dst.addr;
407             cmm.SourceMemoryAddress = src.addr;
408          }
409 #elif GEN_IS_HASWELL
410          {
411             struct gen_mi_value tmp = gen_mi_new_gpr(b);
412             _gen_mi_copy_no_unref(b, tmp, src);
413             _gen_mi_copy_no_unref(b, dst, tmp);
414             gen_mi_value_unref(b, tmp);
415          }
416 #else
417          unreachable("Cannot do mem <-> mem copy on IVB and earlier");
418 #endif
419          break;
420 
421       case GEN_MI_VALUE_TYPE_REG32:
422       case GEN_MI_VALUE_TYPE_REG64:
423          gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
424             struct gen_mi_reg_num reg = gen_mi_adjust_reg_num(src.reg);
425             srm.RegisterAddress = reg.num;
426 #if GEN_GEN >= 11
427             srm.AddCSMMIOStartOffset = reg.cs;
428 #endif
429             srm.MemoryAddress = dst.addr;
430          }
431          break;
432 
433       default:
434          unreachable("Invalid gen_mi_value type");
435       }
436       break;
437 
438    case GEN_MI_VALUE_TYPE_REG32:
439       switch (src.type) {
440       case GEN_MI_VALUE_TYPE_IMM:
441          gen_mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {
442             struct gen_mi_reg_num reg = gen_mi_adjust_reg_num(dst.reg);
443             lri.RegisterOffset = reg.num;
444 #if GEN_GEN >= 11
445             lri.AddCSMMIOStartOffset = reg.cs;
446 #endif
447             lri.DataDWord = src.imm;
448          }
449          break;
450 
451       case GEN_MI_VALUE_TYPE_MEM32:
452       case GEN_MI_VALUE_TYPE_MEM64:
453          gen_mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {
454             struct gen_mi_reg_num reg = gen_mi_adjust_reg_num(dst.reg);
455             lrm.RegisterAddress = reg.num;
456 #if GEN_GEN >= 11
457             lrm.AddCSMMIOStartOffset = reg.cs;
458 #endif
459             lrm.MemoryAddress = src.addr;
460          }
461          break;
462 
463       case GEN_MI_VALUE_TYPE_REG32:
464       case GEN_MI_VALUE_TYPE_REG64:
465 #if GEN_GEN >= 8 || GEN_IS_HASWELL
466          if (src.reg != dst.reg) {
467             gen_mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {
468                struct gen_mi_reg_num reg = gen_mi_adjust_reg_num(src.reg);
469                lrr.SourceRegisterAddress = reg.num;
470 #if GEN_GEN >= 11
471                lrr.AddCSMMIOStartOffsetSource = reg.cs;
472 #endif
473                reg = gen_mi_adjust_reg_num(dst.reg);
474                lrr.DestinationRegisterAddress = reg.num;
475 #if GEN_GEN >= 11
476                lrr.AddCSMMIOStartOffsetDestination = reg.cs;
477 #endif
478             }
479          }
480 #else
481          unreachable("Cannot do reg <-> reg copy on IVB and earlier");
482 #endif
483          break;
484 
485       default:
486          unreachable("Invalid gen_mi_value type");
487       }
488       break;
489 
490    default:
491       unreachable("Invalid gen_mi_value type");
492    }
493 }
494 
495 /** Store the value in src to the value represented by dst
496  *
497  * If the bit size of src and dst mismatch, this function does an unsigned
498  * integer cast.  If src has more bits than dst, it takes the bottom bits.  If
499  * src has fewer bits then dst, it fills the top bits with zeros.
500  *
501  * This function consumes one reference for each of src and dst.
502  */
503 static inline void
gen_mi_store(struct gen_mi_builder * b,struct gen_mi_value dst,struct gen_mi_value src)504 gen_mi_store(struct gen_mi_builder *b,
505              struct gen_mi_value dst, struct gen_mi_value src)
506 {
507    _gen_mi_copy_no_unref(b, dst, src);
508    gen_mi_value_unref(b, src);
509    gen_mi_value_unref(b, dst);
510 }
511 
512 static inline void
gen_mi_memset(struct gen_mi_builder * b,__gen_address_type dst,uint32_t value,uint32_t size)513 gen_mi_memset(struct gen_mi_builder *b, __gen_address_type dst,
514               uint32_t value, uint32_t size)
515 {
516 #if GEN_GEN >= 8 || GEN_IS_HASWELL
517    assert(b->num_math_dwords == 0);
518 #endif
519 
520    /* This memset operates in units of dwords. */
521    assert(size % 4 == 0);
522 
523    for (uint32_t i = 0; i < size; i += 4) {
524       gen_mi_store(b, gen_mi_mem32(__gen_address_offset(dst, i)),
525                       gen_mi_imm(value));
526    }
527 }
528 
529 /* NOTE: On IVB, this function stomps GEN7_3DPRIM_BASE_VERTEX */
530 static inline void
gen_mi_memcpy(struct gen_mi_builder * b,__gen_address_type dst,__gen_address_type src,uint32_t size)531 gen_mi_memcpy(struct gen_mi_builder *b, __gen_address_type dst,
532               __gen_address_type src, uint32_t size)
533 {
534 #if GEN_GEN >= 8 || GEN_IS_HASWELL
535    assert(b->num_math_dwords == 0);
536 #endif
537 
538    /* This memcpy operates in units of dwords. */
539    assert(size % 4 == 0);
540 
541    for (uint32_t i = 0; i < size; i += 4) {
542       struct gen_mi_value dst_val = gen_mi_mem32(__gen_address_offset(dst, i));
543       struct gen_mi_value src_val = gen_mi_mem32(__gen_address_offset(src, i));
544 #if GEN_GEN >= 8 || GEN_IS_HASWELL
545       gen_mi_store(b, dst_val, src_val);
546 #else
547       /* IVB does not have a general purpose register for command streamer
548        * commands. Therefore, we use an alternate temporary register.
549        */
550       struct gen_mi_value tmp_reg = gen_mi_reg32(0x2440); /* GEN7_3DPRIM_BASE_VERTEX */
551       gen_mi_store(b, tmp_reg, src_val);
552       gen_mi_store(b, dst_val, tmp_reg);
553 #endif
554    }
555 }
556 
557 /*
558  * MI_MATH Section.  Only available on Haswell+
559  */
560 
561 #if GEN_GEN >= 8 || GEN_IS_HASWELL
562 
563 /**
564  * Perform a predicated store (assuming the condition is already loaded
565  * in the MI_PREDICATE_RESULT register) of the value in src to the memory
566  * location specified by dst.  Non-memory destinations are not supported.
567  *
568  * This function consumes one reference for each of src and dst.
569  */
570 static inline void
gen_mi_store_if(struct gen_mi_builder * b,struct gen_mi_value dst,struct gen_mi_value src)571 gen_mi_store_if(struct gen_mi_builder *b,
572                 struct gen_mi_value dst,
573                 struct gen_mi_value src)
574 {
575    assert(!dst.invert && !src.invert);
576 
577    gen_mi_builder_flush_math(b);
578 
579    /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
580     * destination to be memory, and resolve the source to a temporary
581     * register if it isn't in one already.
582     */
583    assert(dst.type == GEN_MI_VALUE_TYPE_MEM64 ||
584           dst.type == GEN_MI_VALUE_TYPE_MEM32);
585 
586    if (src.type != GEN_MI_VALUE_TYPE_REG32 &&
587        src.type != GEN_MI_VALUE_TYPE_REG64) {
588       struct gen_mi_value tmp = gen_mi_new_gpr(b);
589       _gen_mi_copy_no_unref(b, tmp, src);
590       src = tmp;
591    }
592 
593    if (dst.type == GEN_MI_VALUE_TYPE_MEM64) {
594       gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
595          struct gen_mi_reg_num reg = gen_mi_adjust_reg_num(src.reg);
596          srm.RegisterAddress = reg.num;
597 #if GEN_GEN >= 11
598          srm.AddCSMMIOStartOffset = reg.cs;
599 #endif
600          srm.MemoryAddress = dst.addr;
601          srm.PredicateEnable = true;
602       }
603       gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
604          struct gen_mi_reg_num reg = gen_mi_adjust_reg_num(src.reg + 4);
605          srm.RegisterAddress = reg.num;
606 #if GEN_GEN >= 11
607          srm.AddCSMMIOStartOffset = reg.cs;
608 #endif
609          srm.MemoryAddress = __gen_address_offset(dst.addr, 4);
610          srm.PredicateEnable = true;
611       }
612    } else {
613       gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
614          struct gen_mi_reg_num reg = gen_mi_adjust_reg_num(src.reg);
615          srm.RegisterAddress = reg.num;
616 #if GEN_GEN >= 11
617          srm.AddCSMMIOStartOffset = reg.cs;
618 #endif
619          srm.MemoryAddress = dst.addr;
620          srm.PredicateEnable = true;
621       }
622    }
623 
624    gen_mi_value_unref(b, src);
625    gen_mi_value_unref(b, dst);
626 }
627 
628 static inline void
_gen_mi_builder_push_math(struct gen_mi_builder * b,const uint32_t * dwords,unsigned num_dwords)629 _gen_mi_builder_push_math(struct gen_mi_builder *b,
630                           const uint32_t *dwords,
631                           unsigned num_dwords)
632 {
633    assert(num_dwords < GEN_MI_BUILDER_MAX_MATH_DWORDS);
634    if (b->num_math_dwords + num_dwords > GEN_MI_BUILDER_MAX_MATH_DWORDS)
635       gen_mi_builder_flush_math(b);
636 
637    memcpy(&b->math_dwords[b->num_math_dwords],
638           dwords, num_dwords * sizeof(*dwords));
639    b->num_math_dwords += num_dwords;
640 }
641 
642 static inline uint32_t
_gen_mi_pack_alu(uint32_t opcode,uint32_t operand1,uint32_t operand2)643 _gen_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
644 {
645    struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
646       .Operand2 = operand2,
647       .Operand1 = operand1,
648       .ALUOpcode = opcode,
649    };
650 
651    uint32_t dw;
652    GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
653 
654    return dw;
655 }
656 
657 static inline struct gen_mi_value
gen_mi_value_to_gpr(struct gen_mi_builder * b,struct gen_mi_value val)658 gen_mi_value_to_gpr(struct gen_mi_builder *b, struct gen_mi_value val)
659 {
660    if (gen_mi_value_is_gpr(val))
661       return val;
662 
663    /* Save off the invert flag because it makes copy() grumpy */
664    bool invert = val.invert;
665    val.invert = false;
666 
667    struct gen_mi_value tmp = gen_mi_new_gpr(b);
668    _gen_mi_copy_no_unref(b, tmp, val);
669    tmp.invert = invert;
670 
671    return tmp;
672 }
673 
674 static inline uint32_t
_gen_mi_math_load_src(struct gen_mi_builder * b,unsigned src,struct gen_mi_value * val)675 _gen_mi_math_load_src(struct gen_mi_builder *b,
676                       unsigned src, struct gen_mi_value *val)
677 {
678    if (val->type == GEN_MI_VALUE_TYPE_IMM &&
679        (val->imm == 0 || val->imm == UINT64_MAX)) {
680       uint64_t imm = val->invert ? ~val->imm : val->imm;
681       return _gen_mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);
682    } else {
683       *val = gen_mi_value_to_gpr(b, *val);
684       return _gen_mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,
685                               src, _gen_mi_value_as_gpr(*val));
686    }
687 }
688 
689 static inline struct gen_mi_value
gen_mi_math_binop(struct gen_mi_builder * b,uint32_t opcode,struct gen_mi_value src0,struct gen_mi_value src1,uint32_t store_op,uint32_t store_src)690 gen_mi_math_binop(struct gen_mi_builder *b, uint32_t opcode,
691                   struct gen_mi_value src0, struct gen_mi_value src1,
692                   uint32_t store_op, uint32_t store_src)
693 {
694    struct gen_mi_value dst = gen_mi_new_gpr(b);
695 
696    uint32_t dw[4];
697    dw[0] = _gen_mi_math_load_src(b, MI_ALU_SRCA, &src0);
698    dw[1] = _gen_mi_math_load_src(b, MI_ALU_SRCB, &src1);
699    dw[2] = _gen_mi_pack_alu(opcode, 0, 0);
700    dw[3] = _gen_mi_pack_alu(store_op, _gen_mi_value_as_gpr(dst), store_src);
701    _gen_mi_builder_push_math(b, dw, 4);
702 
703    gen_mi_value_unref(b, src0);
704    gen_mi_value_unref(b, src1);
705 
706    return dst;
707 }
708 
709 static inline struct gen_mi_value
gen_mi_inot(struct gen_mi_builder * b,struct gen_mi_value val)710 gen_mi_inot(struct gen_mi_builder *b, struct gen_mi_value val)
711 {
712    /* TODO These currently can't be passed into gen_mi_copy */
713    val.invert = !val.invert;
714    return val;
715 }
716 
717 static inline struct gen_mi_value
gen_mi_iadd(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)718 gen_mi_iadd(struct gen_mi_builder *b,
719             struct gen_mi_value src0, struct gen_mi_value src1)
720 {
721    return gen_mi_math_binop(b, MI_ALU_ADD, src0, src1,
722                             MI_ALU_STORE, MI_ALU_ACCU);
723 }
724 
725 static inline struct gen_mi_value
gen_mi_iadd_imm(struct gen_mi_builder * b,struct gen_mi_value src,uint64_t N)726 gen_mi_iadd_imm(struct gen_mi_builder *b,
727                 struct gen_mi_value src, uint64_t N)
728 {
729    if (N == 0)
730       return src;
731 
732    return gen_mi_iadd(b, src, gen_mi_imm(N));
733 }
734 
735 static inline struct gen_mi_value
gen_mi_isub(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)736 gen_mi_isub(struct gen_mi_builder *b,
737             struct gen_mi_value src0, struct gen_mi_value src1)
738 {
739    return gen_mi_math_binop(b, MI_ALU_SUB, src0, src1,
740                             MI_ALU_STORE, MI_ALU_ACCU);
741 }
742 
743 static inline struct gen_mi_value
gen_mi_ult(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)744 gen_mi_ult(struct gen_mi_builder *b,
745            struct gen_mi_value src0, struct gen_mi_value src1)
746 {
747    /* Compute "less than" by subtracting and storing the carry bit */
748    return gen_mi_math_binop(b, MI_ALU_SUB, src0, src1,
749                             MI_ALU_STORE, MI_ALU_CF);
750 }
751 
752 static inline struct gen_mi_value
gen_mi_uge(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)753 gen_mi_uge(struct gen_mi_builder *b,
754            struct gen_mi_value src0, struct gen_mi_value src1)
755 {
756    /* Compute "less than" by subtracting and storing the carry bit */
757    return gen_mi_math_binop(b, MI_ALU_SUB, src0, src1,
758                             MI_ALU_STOREINV, MI_ALU_CF);
759 }
760 
761 static inline struct gen_mi_value
gen_mi_iand(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)762 gen_mi_iand(struct gen_mi_builder *b,
763             struct gen_mi_value src0, struct gen_mi_value src1)
764 {
765    return gen_mi_math_binop(b, MI_ALU_AND, src0, src1,
766                             MI_ALU_STORE, MI_ALU_ACCU);
767 }
768 
769 /**
770  * Returns (src != 0) ? 1 : 0.
771  */
772 static inline struct gen_mi_value
gen_mi_nz(struct gen_mi_builder * b,struct gen_mi_value src)773 gen_mi_nz(struct gen_mi_builder *b, struct gen_mi_value src)
774 {
775    return gen_mi_math_binop(b, MI_ALU_ADD, src, gen_mi_imm(0),
776                             MI_ALU_STOREINV, MI_ALU_ZF);
777 }
778 
779 /**
780  * Returns (src == 0) ? 1 : 0.
781  */
782 static inline struct gen_mi_value
gen_mi_z(struct gen_mi_builder * b,struct gen_mi_value src)783 gen_mi_z(struct gen_mi_builder *b, struct gen_mi_value src)
784 {
785    return gen_mi_math_binop(b, MI_ALU_ADD, src, gen_mi_imm(0),
786                             MI_ALU_STORE, MI_ALU_ZF);
787 }
788 
789 static inline struct gen_mi_value
gen_mi_ior(struct gen_mi_builder * b,struct gen_mi_value src0,struct gen_mi_value src1)790 gen_mi_ior(struct gen_mi_builder *b,
791            struct gen_mi_value src0, struct gen_mi_value src1)
792 {
793    return gen_mi_math_binop(b, MI_ALU_OR, src0, src1,
794                             MI_ALU_STORE, MI_ALU_ACCU);
795 }
796 
797 static inline struct gen_mi_value
gen_mi_imul_imm(struct gen_mi_builder * b,struct gen_mi_value src,uint32_t N)798 gen_mi_imul_imm(struct gen_mi_builder *b,
799                 struct gen_mi_value src, uint32_t N)
800 {
801    if (N == 0) {
802       gen_mi_value_unref(b, src);
803       return gen_mi_imm(0);
804    }
805 
806    if (N == 1)
807       return src;
808 
809    src = gen_mi_value_to_gpr(b, src);
810 
811    struct gen_mi_value res = gen_mi_value_ref(b, src);
812 
813    unsigned top_bit = 31 - __builtin_clz(N);
814    for (int i = top_bit - 1; i >= 0; i--) {
815       res = gen_mi_iadd(b, res, gen_mi_value_ref(b, res));
816       if (N & (1 << i))
817          res = gen_mi_iadd(b, res, gen_mi_value_ref(b, src));
818    }
819 
820    gen_mi_value_unref(b, src);
821 
822    return res;
823 }
824 
825 static inline struct gen_mi_value
gen_mi_ishl_imm(struct gen_mi_builder * b,struct gen_mi_value src,uint32_t shift)826 gen_mi_ishl_imm(struct gen_mi_builder *b,
827                 struct gen_mi_value src, uint32_t shift)
828 {
829    struct gen_mi_value res = gen_mi_value_to_gpr(b, src);
830 
831    for (unsigned i = 0; i < shift; i++)
832       res = gen_mi_iadd(b, res, gen_mi_value_ref(b, res));
833 
834    return res;
835 }
836 
837 static inline struct gen_mi_value
gen_mi_ushr32_imm(struct gen_mi_builder * b,struct gen_mi_value src,uint32_t shift)838 gen_mi_ushr32_imm(struct gen_mi_builder *b,
839                   struct gen_mi_value src, uint32_t shift)
840 {
841    /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
842     * of the result.  This assumes the top 32 bits are zero.
843     */
844    if (shift > 64)
845       return gen_mi_imm(0);
846 
847    if (shift > 32) {
848       struct gen_mi_value tmp = gen_mi_new_gpr(b);
849       _gen_mi_copy_no_unref(b, gen_mi_value_half(tmp, false),
850                                gen_mi_value_half(src, true));
851       _gen_mi_copy_no_unref(b, gen_mi_value_half(tmp, true), gen_mi_imm(0));
852       gen_mi_value_unref(b, src);
853       src = tmp;
854       shift -= 32;
855    }
856    assert(shift <= 32);
857    struct gen_mi_value tmp = gen_mi_ishl_imm(b, src, 32 - shift);
858    struct gen_mi_value dst = gen_mi_new_gpr(b);
859    _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, false),
860                             gen_mi_value_half(tmp, true));
861    _gen_mi_copy_no_unref(b, gen_mi_value_half(dst, true), gen_mi_imm(0));
862    gen_mi_value_unref(b, tmp);
863    return dst;
864 }
865 
866 static inline struct gen_mi_value
gen_mi_udiv32_imm(struct gen_mi_builder * b,struct gen_mi_value N,uint32_t D)867 gen_mi_udiv32_imm(struct gen_mi_builder *b,
868                   struct gen_mi_value N, uint32_t D)
869 {
870    /* We implicitly assume that N is only a 32-bit value */
871    if (D == 0) {
872       /* This is invalid but we should do something */
873       return gen_mi_imm(0);
874    } else if (util_is_power_of_two_or_zero(D)) {
875       return gen_mi_ushr32_imm(b, N, util_logbase2(D));
876    } else {
877       struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
878       assert(m.multiplier <= UINT32_MAX);
879 
880       if (m.pre_shift)
881          N = gen_mi_ushr32_imm(b, N, m.pre_shift);
882 
883       /* Do the 32x32 multiply  into gpr0 */
884       N = gen_mi_imul_imm(b, N, m.multiplier);
885 
886       if (m.increment)
887          N = gen_mi_iadd(b, N, gen_mi_imm(m.multiplier));
888 
889       N = gen_mi_ushr32_imm(b, N, 32);
890 
891       if (m.post_shift)
892          N = gen_mi_ushr32_imm(b, N, m.post_shift);
893 
894       return N;
895    }
896 }
897 
898 #endif /* MI_MATH section */
899 
900 /* This assumes addresses of strictly more than 32bits (aka. Gen8+). */
901 #if GEN_MI_BUILDER_CAN_WRITE_BATCH
902 
903 struct gen_mi_address_token {
904    /* Pointers to address memory fields in the batch. */
905    uint64_t *ptrs[2];
906 };
907 
908 static inline struct gen_mi_address_token
gen_mi_store_address(struct gen_mi_builder * b,struct gen_mi_value addr_reg)909 gen_mi_store_address(struct gen_mi_builder *b,
910                      struct gen_mi_value addr_reg)
911 {
912    gen_mi_builder_flush_math(b);
913 
914    assert(addr_reg.type == GEN_MI_VALUE_TYPE_REG64);
915 
916    struct gen_mi_address_token token = {};
917 
918    for (unsigned i = 0; i < 2; i++) {
919       gen_mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
920          srm.RegisterAddress = addr_reg.reg + (i * 4);
921 
922          const unsigned addr_dw =
923             GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;
924          token.ptrs[i] = (void *)_dst + addr_dw;
925       }
926    }
927 
928    gen_mi_value_unref(b, addr_reg);
929    return token;
930 }
931 
932 static inline void
gen_mi_self_mod_barrier(struct gen_mi_builder * b)933 gen_mi_self_mod_barrier(struct gen_mi_builder *b)
934 {
935    /* First make sure all the memory writes from previous modifying commands
936     * have landed. We want to do this before going through the CS cache,
937     * otherwise we could be fetching memory that hasn't been written to yet.
938     */
939    gen_mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
940       pc.CommandStreamerStallEnable = true;
941    }
942    /* Documentation says Gen11+ should be able to invalidate the command cache
943     * but experiment show it doesn't work properly, so for now just get over
944     * the CS prefetch.
945     */
946    for (uint32_t i = 0; i < 128; i++)
947       gen_mi_builder_emit(b, GENX(MI_NOOP), noop);
948 }
949 
950 static inline void
_gen_mi_resolve_address_token(struct gen_mi_builder * b,struct gen_mi_address_token token,void * batch_location)951 _gen_mi_resolve_address_token(struct gen_mi_builder *b,
952                               struct gen_mi_address_token token,
953                               void *batch_location)
954 {
955    uint64_t addr_addr_u64 = __gen_get_batch_address(b->user_data,
956                                                     batch_location);
957    *(token.ptrs[0]) = addr_addr_u64;
958    *(token.ptrs[1]) = addr_addr_u64 + 4;
959 }
960 
961 #endif /* GEN_MI_BUILDER_CAN_WRITE_BATCH */
962 
963 #endif /* GEN_MI_BUILDER_H */
964