1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright (C) 2012-2015 LunarG, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Chia-I Wu <olv@lunarg.com>
26  */
27 
28 #include "ilo_debug.h"
29 #include "ilo_state_shader.h"
30 
31 enum vertex_stage {
32    STAGE_VS,
33    STAGE_HS,
34    STAGE_DS,
35    STAGE_GS,
36 };
37 
38 struct vertex_ff {
39    uint8_t grf_start;
40 
41    uint8_t per_thread_scratch_space;
42    uint32_t per_thread_scratch_size;
43 
44    uint8_t sampler_count;
45    uint8_t surface_count;
46    bool has_uav;
47 
48    uint8_t vue_read_offset;
49    uint8_t vue_read_len;
50 
51    uint8_t user_clip_enables;
52 };
53 
54 static bool
vertex_validate_gen6_kernel(const struct ilo_dev * dev,enum vertex_stage stage,const struct ilo_state_shader_kernel_info * kernel)55 vertex_validate_gen6_kernel(const struct ilo_dev *dev,
56                             enum vertex_stage stage,
57                             const struct ilo_state_shader_kernel_info *kernel)
58 {
59    /*
60     * "Dispatch GRF Start Register for URB Data" is U4 for GS and U5 for
61     * others.
62     */
63    const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32;
64 
65    ILO_DEV_ASSERT(dev, 6, 8);
66 
67    /* we do not want to save it */
68    assert(!kernel->offset);
69 
70    assert(kernel->grf_start < max_grf_start);
71 
72    return true;
73 }
74 
75 static bool
vertex_validate_gen6_urb(const struct ilo_dev * dev,enum vertex_stage stage,const struct ilo_state_shader_urb_info * urb)76 vertex_validate_gen6_urb(const struct ilo_dev *dev,
77                          enum vertex_stage stage,
78                          const struct ilo_state_shader_urb_info *urb)
79 {
80    /* "Vertex/Patch URB Entry Read Offset" is U6, in pairs */
81    const uint8_t max_read_base = 63 * 2;
82    /*
83     * "Vertex/Patch URB Entry Read Length" is limited to 64 for DS and U6 for
84     * others, in pairs
85     */
86    const uint8_t max_read_count = ((stage == STAGE_DS) ? 64 : 63) * 2;
87 
88    ILO_DEV_ASSERT(dev, 6, 8);
89 
90    assert(urb->read_base + urb->read_count <= urb->cv_input_attr_count);
91 
92    assert(urb->read_base % 2 == 0 && urb->read_base <= max_read_base);
93 
94    /*
95     * There is no need to worry about reading past entries, as URB entries are
96     * aligned to 1024-bits (Gen6) or 512-bits (Gen7+).
97     */
98    assert(urb->read_count <= max_read_count);
99 
100    return true;
101 }
102 
103 static bool
vertex_get_gen6_ff(const struct ilo_dev * dev,enum vertex_stage stage,const struct ilo_state_shader_kernel_info * kernel,const struct ilo_state_shader_resource_info * resource,const struct ilo_state_shader_urb_info * urb,uint32_t per_thread_scratch_size,struct vertex_ff * ff)104 vertex_get_gen6_ff(const struct ilo_dev *dev,
105                    enum vertex_stage stage,
106                    const struct ilo_state_shader_kernel_info *kernel,
107                    const struct ilo_state_shader_resource_info *resource,
108                    const struct ilo_state_shader_urb_info *urb,
109                    uint32_t per_thread_scratch_size,
110                    struct vertex_ff *ff)
111 {
112    ILO_DEV_ASSERT(dev, 6, 8);
113 
114    memset(ff, 0, sizeof(*ff));
115 
116    if (!vertex_validate_gen6_kernel(dev, stage, kernel) ||
117        !vertex_validate_gen6_urb(dev, stage, urb))
118       return false;
119 
120    ff->grf_start = kernel->grf_start;
121 
122    if (per_thread_scratch_size) {
123       /*
124        * From the Sandy Bridge PRM, volume 2 part 1, page 134:
125        *
126        *     "(Per-Thread Scratch Space)
127        *      Range    [0,11] indicating [1K Bytes, 2M Bytes]"
128        */
129       assert(per_thread_scratch_size <= 2 * 1024 * 1024);
130 
131       /* next power of two, starting from 1KB */
132       ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ?
133          (util_last_bit(per_thread_scratch_size - 1) - 10) : 0;
134       ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
135    }
136 
137    ff->sampler_count = (resource->sampler_count <= 12) ?
138       (resource->sampler_count + 3) / 4 : 4;
139    ff->surface_count = resource->surface_count;
140    ff->has_uav = resource->has_uav;
141 
142    ff->vue_read_offset = urb->read_base / 2;
143    ff->vue_read_len = (urb->read_count + 1) / 2;
144 
145    /* need to read something unless VUE handles are included */
146    switch (stage) {
147    case STAGE_VS:
148       if (!ff->vue_read_len)
149          ff->vue_read_len = 1;
150 
151       /* one GRF per attribute */
152       assert(kernel->grf_start + urb->read_count * 2 <= 128);
153       break;
154    case STAGE_GS:
155       if (ilo_dev_gen(dev) == ILO_GEN(6) && !ff->vue_read_len)
156          ff->vue_read_len = 1;
157       break;
158    default:
159       break;
160    }
161 
162    ff->user_clip_enables = urb->user_clip_enables;
163 
164    return true;
165 }
166 
167 static uint16_t
vs_get_gen6_thread_count(const struct ilo_dev * dev,const struct ilo_state_vs_info * info)168 vs_get_gen6_thread_count(const struct ilo_dev *dev,
169                          const struct ilo_state_vs_info *info)
170 {
171    uint16_t thread_count;
172 
173    ILO_DEV_ASSERT(dev, 6, 8);
174 
175    /* Maximum Number of Threads of 3DSTATE_VS */
176    switch (ilo_dev_gen(dev)) {
177    case ILO_GEN(8):
178       thread_count = 504;
179       break;
180    case ILO_GEN(7.5):
181       thread_count = (dev->gt >= 2) ? 280 : 70;
182       break;
183    case ILO_GEN(7):
184    case ILO_GEN(6):
185    default:
186       thread_count = dev->thread_count;
187       break;
188    }
189 
190    return thread_count - 1;
191 }
192 
193 static bool
vs_set_gen6_3DSTATE_VS(struct ilo_state_vs * vs,const struct ilo_dev * dev,const struct ilo_state_vs_info * info)194 vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
195                        const struct ilo_dev *dev,
196                        const struct ilo_state_vs_info *info)
197 {
198    struct vertex_ff ff;
199    uint16_t thread_count;
200    uint32_t dw2, dw3, dw4, dw5;
201 
202    ILO_DEV_ASSERT(dev, 6, 8);
203 
204    if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource,
205             &info->urb, info->per_thread_scratch_size, &ff))
206       return false;
207 
208    thread_count = vs_get_gen6_thread_count(dev, info);
209 
210    dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
211          ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
212 
213    if (false)
214       dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
215 
216    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
217       dw2 |= GEN75_THREADDISP_ACCESS_UAV;
218 
219    dw3 = ff.per_thread_scratch_space <<
220       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
221 
222    dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
223          ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
224          ff.vue_read_offset << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
225 
226    dw5 = 0;
227 
228    if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
229       dw5 |= thread_count << GEN75_VS_DW5_MAX_THREADS__SHIFT;
230    else
231       dw5 |= thread_count << GEN6_VS_DW5_MAX_THREADS__SHIFT;
232 
233    if (info->stats_enable)
234       dw5 |= GEN6_VS_DW5_STATISTICS;
235    if (info->dispatch_enable)
236       dw5 |= GEN6_VS_DW5_VS_ENABLE;
237 
238    STATIC_ASSERT(ARRAY_SIZE(vs->vs) >= 5);
239    vs->vs[0] = dw2;
240    vs->vs[1] = dw3;
241    vs->vs[2] = dw4;
242    vs->vs[3] = dw5;
243 
244    if (ilo_dev_gen(dev) >= ILO_GEN(8))
245       vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
246 
247    vs->scratch_size = ff.per_thread_scratch_size * thread_count;
248 
249    return true;
250 }
251 
252 static uint16_t
hs_get_gen7_thread_count(const struct ilo_dev * dev,const struct ilo_state_hs_info * info)253 hs_get_gen7_thread_count(const struct ilo_dev *dev,
254                          const struct ilo_state_hs_info *info)
255 {
256    uint16_t thread_count;
257 
258    ILO_DEV_ASSERT(dev, 7, 8);
259 
260    /* Maximum Number of Threads of 3DSTATE_HS */
261    switch (ilo_dev_gen(dev)) {
262    case ILO_GEN(8):
263       thread_count = 504;
264       break;
265    case ILO_GEN(7.5):
266       thread_count = (dev->gt >= 2) ? 256 : 70;
267       break;
268    case ILO_GEN(7):
269    default:
270       thread_count = dev->thread_count;
271       break;
272    }
273 
274    return thread_count - 1;
275 }
276 
277 static bool
hs_set_gen7_3DSTATE_HS(struct ilo_state_hs * hs,const struct ilo_dev * dev,const struct ilo_state_hs_info * info)278 hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
279                        const struct ilo_dev *dev,
280                        const struct ilo_state_hs_info *info)
281 {
282    struct vertex_ff ff;
283    uint16_t thread_count;
284    uint32_t dw1, dw2, dw4, dw5;
285 
286    ILO_DEV_ASSERT(dev, 7, 8);
287 
288    if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource,
289             &info->urb, info->per_thread_scratch_size, &ff))
290       return false;
291 
292    thread_count = hs_get_gen7_thread_count(dev, info);
293 
294    dw1 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
295          ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
296 
297    dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
298 
299    if (ilo_dev_gen(dev) >= ILO_GEN(8))
300       dw2 |= thread_count << GEN8_HS_DW2_MAX_THREADS__SHIFT;
301    else if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
302       dw1 |= thread_count << GEN75_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
303    else
304       dw1 |= thread_count << GEN7_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
305 
306    if (info->dispatch_enable)
307       dw2 |= GEN7_HS_DW2_HS_ENABLE;
308    if (info->stats_enable)
309       dw2 |= GEN7_HS_DW2_STATISTICS;
310 
311    dw4 = ff.per_thread_scratch_space <<
312       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
313 
314    dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES |
315          ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT |
316          ff.vue_read_len << GEN7_HS_DW5_URB_READ_LEN__SHIFT |
317          ff.vue_read_offset << GEN7_HS_DW5_URB_READ_OFFSET__SHIFT;
318 
319    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
320       dw5 |= GEN75_HS_DW5_ACCESS_UAV;
321 
322    STATIC_ASSERT(ARRAY_SIZE(hs->hs) >= 4);
323    hs->hs[0] = dw1;
324    hs->hs[1] = dw2;
325    hs->hs[2] = dw4;
326    hs->hs[3] = dw5;
327 
328    hs->scratch_size = ff.per_thread_scratch_size * thread_count;
329 
330    return true;
331 }
332 
333 static bool
ds_set_gen7_3DSTATE_TE(struct ilo_state_ds * ds,const struct ilo_dev * dev,const struct ilo_state_ds_info * info)334 ds_set_gen7_3DSTATE_TE(struct ilo_state_ds *ds,
335                        const struct ilo_dev *dev,
336                        const struct ilo_state_ds_info *info)
337 {
338    uint32_t dw1;
339 
340    ILO_DEV_ASSERT(dev, 7, 8);
341 
342    dw1 = 0;
343 
344    if (info->dispatch_enable) {
345       dw1 |= GEN7_TE_DW1_MODE_HW |
346              GEN7_TE_DW1_TE_ENABLE;
347    }
348 
349    STATIC_ASSERT(ARRAY_SIZE(ds->te) >= 3);
350    ds->te[0] = dw1;
351    ds->te[1] = fui(63.0f);
352    ds->te[2] = fui(64.0f);
353 
354    return true;
355 }
356 
357 static uint16_t
ds_get_gen7_thread_count(const struct ilo_dev * dev,const struct ilo_state_ds_info * info)358 ds_get_gen7_thread_count(const struct ilo_dev *dev,
359                          const struct ilo_state_ds_info *info)
360 {
361    uint16_t thread_count;
362 
363    ILO_DEV_ASSERT(dev, 7, 8);
364 
365    /* Maximum Number of Threads of 3DSTATE_DS */
366    switch (ilo_dev_gen(dev)) {
367    case ILO_GEN(8):
368       thread_count = 504;
369       break;
370    case ILO_GEN(7.5):
371       thread_count = (dev->gt >= 2) ? 280 : 70;
372       break;
373    case ILO_GEN(7):
374    default:
375       thread_count = dev->thread_count;
376       break;
377    }
378 
379    return thread_count - 1;
380 }
381 
382 static bool
ds_set_gen7_3DSTATE_DS(struct ilo_state_ds * ds,const struct ilo_dev * dev,const struct ilo_state_ds_info * info)383 ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
384                        const struct ilo_dev *dev,
385                        const struct ilo_state_ds_info *info)
386 {
387    struct vertex_ff ff;
388    uint16_t thread_count;
389    uint32_t dw2, dw3, dw4, dw5;
390 
391    ILO_DEV_ASSERT(dev, 7, 8);
392 
393    if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource,
394             &info->urb, info->per_thread_scratch_size, &ff))
395       return false;
396 
397    thread_count = ds_get_gen7_thread_count(dev, info);
398 
399    dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
400          ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
401 
402    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
403       dw2 |= GEN75_THREADDISP_ACCESS_UAV;
404 
405    dw3 = ff.per_thread_scratch_space <<
406       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
407 
408    dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT |
409          ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT |
410          ff.vue_read_offset << GEN7_DS_DW4_URB_READ_OFFSET__SHIFT;
411 
412    dw5 = 0;
413 
414    if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
415       dw5 |= thread_count << GEN75_DS_DW5_MAX_THREADS__SHIFT;
416    else
417       dw5 |= thread_count << GEN7_DS_DW5_MAX_THREADS__SHIFT;
418 
419    if (info->stats_enable)
420       dw5 |= GEN7_DS_DW5_STATISTICS;
421    if (info->dispatch_enable)
422       dw5 |= GEN7_DS_DW5_DS_ENABLE;
423 
424    STATIC_ASSERT(ARRAY_SIZE(ds->ds) >= 5);
425    ds->ds[0] = dw2;
426    ds->ds[1] = dw3;
427    ds->ds[2] = dw4;
428    ds->ds[3] = dw5;
429 
430    if (ilo_dev_gen(dev) >= ILO_GEN(8))
431       ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT;
432 
433    ds->scratch_size = ff.per_thread_scratch_size * thread_count;
434 
435    return true;
436 }
437 
438 static bool
gs_get_gen6_ff(const struct ilo_dev * dev,const struct ilo_state_gs_info * info,struct vertex_ff * ff)439 gs_get_gen6_ff(const struct ilo_dev *dev,
440                const struct ilo_state_gs_info *info,
441                struct vertex_ff *ff)
442 {
443    const struct ilo_state_shader_urb_info *urb = &info->urb;
444    const struct ilo_state_gs_sol_info *sol = &info->sol;
445 
446    ILO_DEV_ASSERT(dev, 6, 8);
447 
448    if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource,
449             &info->urb, info->per_thread_scratch_size, ff))
450       return false;
451 
452    /*
453     * From the Ivy Bridge PRM, volume 2 part 1, page 168-169:
454     *
455     *     "[0,62] indicating [1,63] 16B units"
456     *
457     *     "Programming Restrictions: The vertex size must be programmed as a
458     *      multiple of 32B units with the following exception: Rendering is
459     *      disabled (as per SOL stage state) and the vertex size output by the
460     *      GS thread is 16B.
461     *
462     *      If rendering is enabled (as per SOL state) the vertex size must be
463     *      programmed as a multiple of 32B units. In other words, the only
464     *      time software can program a vertex size with an odd number of 16B
465     *      units is when rendering is disabled."
466     */
467    assert(urb->output_attr_count <= 63);
468    if (!sol->render_disable)
469       assert(urb->output_attr_count % 2 == 0);
470 
471    return true;
472 }
473 
474 static uint16_t
gs_get_gen6_thread_count(const struct ilo_dev * dev,const struct ilo_state_gs_info * info)475 gs_get_gen6_thread_count(const struct ilo_dev *dev,
476                          const struct ilo_state_gs_info *info)
477 {
478    const struct ilo_state_gs_sol_info *sol = &info->sol;
479    uint16_t thread_count;
480 
481    ILO_DEV_ASSERT(dev, 6, 8);
482 
483    /* Maximum Number of Threads of 3DSTATE_GS */
484    switch (ilo_dev_gen(dev)) {
485    case ILO_GEN(8):
486       thread_count = 504;
487       break;
488    case ILO_GEN(7.5):
489       thread_count = (dev->gt >= 2) ? 256 : 70;
490       break;
491    case ILO_GEN(7):
492    case ILO_GEN(6):
493    default:
494       thread_count = dev->thread_count;
495 
496       /*
497        * From the Sandy Bridge PRM, volume 2 part 1, page 154:
498        *
499        *     "Maximum Number of Threads valid range is [0,27] when Rendering
500        *      Enabled bit is set."
501        *
502        * According to the classic driver, [0, 20] for GT1.
503        */
504       if (!sol->render_disable)
505          thread_count = (dev->gt == 2) ? 27 : 20;
506       break;
507    }
508 
509    return thread_count - 1;
510 }
511 
512 static bool
gs_set_gen6_3DSTATE_GS(struct ilo_state_gs * gs,const struct ilo_dev * dev,const struct ilo_state_gs_info * info)513 gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
514                        const struct ilo_dev *dev,
515                        const struct ilo_state_gs_info *info)
516 {
517    const struct ilo_state_gs_sol_info *sol = &info->sol;
518    struct vertex_ff ff;
519    uint16_t thread_count;
520    uint32_t dw2, dw3, dw4, dw5, dw6;
521 
522    ILO_DEV_ASSERT(dev, 6, 6);
523 
524    if (!gs_get_gen6_ff(dev, info, &ff))
525       return false;
526 
527    thread_count = gs_get_gen6_thread_count(dev, info);
528 
529    dw2 = GEN6_THREADDISP_SPF |
530          ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
531          ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
532 
533    dw3 = ff.per_thread_scratch_space <<
534       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
535 
536    dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
537          ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
538          ff.grf_start << GEN6_GS_DW4_URB_GRF_START__SHIFT;
539 
540    dw5 = thread_count << GEN6_GS_DW5_MAX_THREADS__SHIFT;
541 
542    if (info->stats_enable)
543       dw5 |= GEN6_GS_DW5_STATISTICS;
544    if (sol->stats_enable)
545       dw5 |= GEN6_GS_DW5_SO_STATISTICS;
546    if (!sol->render_disable)
547       dw5 |= GEN6_GS_DW5_RENDER_ENABLE;
548 
549    dw6 = 0;
550 
551    /* GEN7_REORDER_TRAILING is handled by the kernel */
552    if (sol->tristrip_reorder == GEN7_REORDER_LEADING)
553       dw6 |= GEN6_GS_DW6_REORDER_LEADING_ENABLE;
554 
555    if (sol->sol_enable) {
556       dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
557 
558       if (sol->svbi_post_inc) {
559          dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
560                 sol->svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
561       }
562    }
563 
564    if (info->dispatch_enable)
565       dw6 |= GEN6_GS_DW6_GS_ENABLE;
566 
567    STATIC_ASSERT(ARRAY_SIZE(gs->gs) >= 5);
568    gs->gs[0] = dw2;
569    gs->gs[1] = dw3;
570    gs->gs[2] = dw4;
571    gs->gs[3] = dw5;
572    gs->gs[4] = dw6;
573 
574    gs->scratch_size = ff.per_thread_scratch_size * thread_count;
575 
576    return true;
577 }
578 
579 static uint8_t
gs_get_gen7_vertex_size(const struct ilo_dev * dev,const struct ilo_state_gs_info * info)580 gs_get_gen7_vertex_size(const struct ilo_dev *dev,
581                         const struct ilo_state_gs_info *info)
582 {
583    const struct ilo_state_shader_urb_info *urb = &info->urb;
584 
585    ILO_DEV_ASSERT(dev, 7, 8);
586 
587    return (urb->output_attr_count) ? urb->output_attr_count - 1 : 0;
588 }
589 
590 static bool
gs_set_gen7_3DSTATE_GS(struct ilo_state_gs * gs,const struct ilo_dev * dev,const struct ilo_state_gs_info * info)591 gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
592                        const struct ilo_dev *dev,
593                        const struct ilo_state_gs_info *info)
594 {
595    struct vertex_ff ff;
596    uint16_t thread_count;
597    uint8_t vertex_size;
598    uint32_t dw2, dw3, dw4, dw5;
599 
600    ILO_DEV_ASSERT(dev, 7, 8);
601 
602    if (!gs_get_gen6_ff(dev, info, &ff))
603       return false;
604 
605    thread_count = gs_get_gen6_thread_count(dev, info);
606    vertex_size = gs_get_gen7_vertex_size(dev, info);
607 
608    dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
609          ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
610 
611    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
612       dw2 |= GEN75_THREADDISP_ACCESS_UAV;
613 
614    dw3 = ff.per_thread_scratch_space <<
615       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
616 
617    dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
618          0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
619          ff.vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
620          GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
621          ff.vue_read_offset << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
622          ff.grf_start << GEN7_GS_DW4_URB_GRF_START__SHIFT;
623 
624    dw5 = 0;
625 
626    if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
627       dw5 = thread_count << GEN75_GS_DW5_MAX_THREADS__SHIFT;
628    else
629       dw5 = thread_count << GEN7_GS_DW5_MAX_THREADS__SHIFT;
630 
631    if (info->stats_enable)
632       dw5 |= GEN7_GS_DW5_STATISTICS;
633    if (info->dispatch_enable)
634       dw5 |= GEN7_GS_DW5_GS_ENABLE;
635 
636    STATIC_ASSERT(ARRAY_SIZE(gs->gs) >= 5);
637    gs->gs[0] = dw2;
638    gs->gs[1] = dw3;
639    gs->gs[2] = dw4;
640    gs->gs[3] = dw5;
641 
642    if (ilo_dev_gen(dev) >= ILO_GEN(8))
643       gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT;
644 
645    gs->scratch_size = ff.per_thread_scratch_size * thread_count;
646 
647    return true;
648 }
649 
650 bool
ilo_state_vs_init(struct ilo_state_vs * vs,const struct ilo_dev * dev,const struct ilo_state_vs_info * info)651 ilo_state_vs_init(struct ilo_state_vs *vs,
652                   const struct ilo_dev *dev,
653                   const struct ilo_state_vs_info *info)
654 {
655    bool ret = true;
656 
657    assert(ilo_is_zeroed(vs, sizeof(*vs)));
658 
659    ret &= vs_set_gen6_3DSTATE_VS(vs, dev, info);
660 
661    assert(ret);
662 
663    return ret;
664 }
665 
666 bool
ilo_state_vs_init_disabled(struct ilo_state_vs * vs,const struct ilo_dev * dev)667 ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
668                            const struct ilo_dev *dev)
669 {
670    struct ilo_state_vs_info info;
671 
672    memset(&info, 0, sizeof(info));
673 
674    return ilo_state_vs_init(vs, dev, &info);
675 }
676 
677 bool
ilo_state_hs_init(struct ilo_state_hs * hs,const struct ilo_dev * dev,const struct ilo_state_hs_info * info)678 ilo_state_hs_init(struct ilo_state_hs *hs,
679                   const struct ilo_dev *dev,
680                   const struct ilo_state_hs_info *info)
681 {
682    bool ret = true;
683 
684    assert(ilo_is_zeroed(hs, sizeof(*hs)));
685 
686    if (ilo_dev_gen(dev) >= ILO_GEN(7))
687       ret &= hs_set_gen7_3DSTATE_HS(hs, dev, info);
688 
689    assert(ret);
690 
691    return ret;
692 }
693 
694 bool
ilo_state_hs_init_disabled(struct ilo_state_hs * hs,const struct ilo_dev * dev)695 ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
696                            const struct ilo_dev *dev)
697 {
698    struct ilo_state_hs_info info;
699 
700    memset(&info, 0, sizeof(info));
701 
702    return ilo_state_hs_init(hs, dev, &info);
703 }
704 
705 bool
ilo_state_ds_init(struct ilo_state_ds * ds,const struct ilo_dev * dev,const struct ilo_state_ds_info * info)706 ilo_state_ds_init(struct ilo_state_ds *ds,
707                   const struct ilo_dev *dev,
708                   const struct ilo_state_ds_info *info)
709 {
710    bool ret = true;
711 
712    assert(ilo_is_zeroed(ds, sizeof(*ds)));
713 
714    if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
715       ret &= ds_set_gen7_3DSTATE_TE(ds, dev, info);
716       ret &= ds_set_gen7_3DSTATE_DS(ds, dev, info);
717    }
718 
719    assert(ret);
720 
721    return ret;
722 }
723 
724 bool
ilo_state_ds_init_disabled(struct ilo_state_ds * ds,const struct ilo_dev * dev)725 ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
726                            const struct ilo_dev *dev)
727 {
728    struct ilo_state_ds_info info;
729 
730    memset(&info, 0, sizeof(info));
731 
732    return ilo_state_ds_init(ds, dev, &info);
733 }
734 
735 bool
ilo_state_gs_init(struct ilo_state_gs * gs,const struct ilo_dev * dev,const struct ilo_state_gs_info * info)736 ilo_state_gs_init(struct ilo_state_gs *gs,
737                   const struct ilo_dev *dev,
738                   const struct ilo_state_gs_info *info)
739 {
740    bool ret = true;
741 
742    assert(ilo_is_zeroed(gs, sizeof(*gs)));
743 
744    if (ilo_dev_gen(dev) >= ILO_GEN(7))
745       ret &= gs_set_gen7_3DSTATE_GS(gs, dev, info);
746    else
747       ret &= gs_set_gen6_3DSTATE_GS(gs, dev, info);
748 
749    assert(ret);
750 
751    return ret;
752 }
753 
754 bool
ilo_state_gs_init_disabled(struct ilo_state_gs * gs,const struct ilo_dev * dev)755 ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
756                            const struct ilo_dev *dev)
757 {
758    struct ilo_state_gs_info info;
759 
760    memset(&info, 0, sizeof(info));
761 
762    return ilo_state_gs_init(gs, dev, &info);
763 }
764