1 /****************************************************************************
2  * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  ***************************************************************************/
23 
24 // llvm redefines DEBUG
25 #pragma push_macro("DEBUG")
26 #undef DEBUG
27 #include "JitManager.h"
28 #pragma pop_macro("DEBUG")
29 
30 #include "common/os.h"
31 #include "jit_api.h"
32 #include "gen_state_llvm.h"
33 #include "core/multisample.h"
34 #include "core/state_funcs.h"
35 
36 #include "gallivm/lp_bld_tgsi.h"
37 #include "util/u_format.h"
38 
39 #include "util/u_memory.h"
40 #include "util/u_inlines.h"
41 #include "util/u_helpers.h"
42 #include "util/u_framebuffer.h"
43 #include "util/u_viewport.h"
44 #include "util/u_prim.h"
45 
46 #include "swr_state.h"
47 #include "swr_context.h"
48 #include "gen_swr_context_llvm.h"
49 #include "swr_screen.h"
50 #include "swr_resource.h"
51 #include "swr_tex_sample.h"
52 #include "swr_scratch.h"
53 #include "swr_shader.h"
54 #include "swr_fence.h"
55 
56 /* These should be pulled out into separate files as necessary
57  * Just initializing everything here to get going. */
58 
59 static void *
swr_create_blend_state(struct pipe_context * pipe,const struct pipe_blend_state * blend)60 swr_create_blend_state(struct pipe_context *pipe,
61                        const struct pipe_blend_state *blend)
62 {
63    struct swr_blend_state *state = CALLOC_STRUCT(swr_blend_state);
64 
65    memcpy(&state->pipe, blend, sizeof(*blend));
66 
67    struct pipe_blend_state *pipe_blend = &state->pipe;
68 
69    for (int target = 0;
70         target < std::min(SWR_NUM_RENDERTARGETS, PIPE_MAX_COLOR_BUFS);
71         target++) {
72 
73       struct pipe_rt_blend_state *rt_blend = &pipe_blend->rt[target];
74       SWR_RENDER_TARGET_BLEND_STATE &blendState =
75          state->blendState.renderTarget[target];
76       RENDER_TARGET_BLEND_COMPILE_STATE &compileState =
77          state->compileState[target];
78 
79       if (target != 0 && !pipe_blend->independent_blend_enable) {
80          memcpy(&compileState,
81                 &state->compileState[0],
82                 sizeof(RENDER_TARGET_BLEND_COMPILE_STATE));
83          continue;
84       }
85 
86       compileState.blendEnable = rt_blend->blend_enable;
87       if (compileState.blendEnable) {
88          compileState.sourceAlphaBlendFactor =
89             swr_convert_blend_factor(rt_blend->alpha_src_factor);
90          compileState.destAlphaBlendFactor =
91             swr_convert_blend_factor(rt_blend->alpha_dst_factor);
92          compileState.sourceBlendFactor =
93             swr_convert_blend_factor(rt_blend->rgb_src_factor);
94          compileState.destBlendFactor =
95             swr_convert_blend_factor(rt_blend->rgb_dst_factor);
96 
97          compileState.colorBlendFunc =
98             swr_convert_blend_func(rt_blend->rgb_func);
99          compileState.alphaBlendFunc =
100             swr_convert_blend_func(rt_blend->alpha_func);
101       }
102       compileState.logicOpEnable = state->pipe.logicop_enable;
103       if (compileState.logicOpEnable) {
104          compileState.logicOpFunc =
105             swr_convert_logic_op(state->pipe.logicop_func);
106       }
107 
108       blendState.writeDisableRed =
109          (rt_blend->colormask & PIPE_MASK_R) ? 0 : 1;
110       blendState.writeDisableGreen =
111          (rt_blend->colormask & PIPE_MASK_G) ? 0 : 1;
112       blendState.writeDisableBlue =
113          (rt_blend->colormask & PIPE_MASK_B) ? 0 : 1;
114       blendState.writeDisableAlpha =
115          (rt_blend->colormask & PIPE_MASK_A) ? 0 : 1;
116 
117       if (rt_blend->colormask == 0)
118          compileState.blendEnable = false;
119    }
120 
121    return state;
122 }
123 
124 static void
swr_bind_blend_state(struct pipe_context * pipe,void * blend)125 swr_bind_blend_state(struct pipe_context *pipe, void *blend)
126 {
127    struct swr_context *ctx = swr_context(pipe);
128 
129    if (ctx->blend == blend)
130       return;
131 
132    ctx->blend = (swr_blend_state *)blend;
133 
134    ctx->dirty |= SWR_NEW_BLEND;
135 }
136 
137 static void
swr_delete_blend_state(struct pipe_context * pipe,void * blend)138 swr_delete_blend_state(struct pipe_context *pipe, void *blend)
139 {
140    FREE(blend);
141 }
142 
143 static void
swr_set_blend_color(struct pipe_context * pipe,const struct pipe_blend_color * color)144 swr_set_blend_color(struct pipe_context *pipe,
145                     const struct pipe_blend_color *color)
146 {
147    struct swr_context *ctx = swr_context(pipe);
148 
149    ctx->blend_color = *color;
150 
151    ctx->dirty |= SWR_NEW_BLEND;
152 }
153 
154 static void
swr_set_stencil_ref(struct pipe_context * pipe,const struct pipe_stencil_ref * ref)155 swr_set_stencil_ref(struct pipe_context *pipe,
156                     const struct pipe_stencil_ref *ref)
157 {
158    struct swr_context *ctx = swr_context(pipe);
159 
160    ctx->stencil_ref = *ref;
161 
162    ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
163 }
164 
165 static void *
swr_create_depth_stencil_state(struct pipe_context * pipe,const struct pipe_depth_stencil_alpha_state * depth_stencil)166 swr_create_depth_stencil_state(
167    struct pipe_context *pipe,
168    const struct pipe_depth_stencil_alpha_state *depth_stencil)
169 {
170    struct pipe_depth_stencil_alpha_state *state;
171 
172    state = (pipe_depth_stencil_alpha_state *)mem_dup(depth_stencil,
173                                                      sizeof *depth_stencil);
174 
175    return state;
176 }
177 
178 static void
swr_bind_depth_stencil_state(struct pipe_context * pipe,void * depth_stencil)179 swr_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil)
180 {
181    struct swr_context *ctx = swr_context(pipe);
182 
183    if (ctx->depth_stencil == (pipe_depth_stencil_alpha_state *)depth_stencil)
184       return;
185 
186    ctx->depth_stencil = (pipe_depth_stencil_alpha_state *)depth_stencil;
187 
188    ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
189 }
190 
191 static void
swr_delete_depth_stencil_state(struct pipe_context * pipe,void * depth)192 swr_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
193 {
194    FREE(depth);
195 }
196 
197 
198 static void *
swr_create_rasterizer_state(struct pipe_context * pipe,const struct pipe_rasterizer_state * rast)199 swr_create_rasterizer_state(struct pipe_context *pipe,
200                             const struct pipe_rasterizer_state *rast)
201 {
202    struct pipe_rasterizer_state *state;
203    state = (pipe_rasterizer_state *)mem_dup(rast, sizeof *rast);
204 
205    return state;
206 }
207 
208 static void
swr_bind_rasterizer_state(struct pipe_context * pipe,void * handle)209 swr_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
210 {
211    struct swr_context *ctx = swr_context(pipe);
212    const struct pipe_rasterizer_state *rasterizer =
213       (const struct pipe_rasterizer_state *)handle;
214 
215    if (ctx->rasterizer == (pipe_rasterizer_state *)rasterizer)
216       return;
217 
218    ctx->rasterizer = (pipe_rasterizer_state *)rasterizer;
219 
220    ctx->dirty |= SWR_NEW_RASTERIZER;
221 }
222 
223 static void
swr_delete_rasterizer_state(struct pipe_context * pipe,void * rasterizer)224 swr_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
225 {
226    FREE(rasterizer);
227 }
228 
229 
230 static void *
swr_create_sampler_state(struct pipe_context * pipe,const struct pipe_sampler_state * sampler)231 swr_create_sampler_state(struct pipe_context *pipe,
232                          const struct pipe_sampler_state *sampler)
233 {
234    struct pipe_sampler_state *state =
235       (pipe_sampler_state *)mem_dup(sampler, sizeof *sampler);
236 
237    return state;
238 }
239 
240 static void
swr_bind_sampler_states(struct pipe_context * pipe,enum pipe_shader_type shader,unsigned start,unsigned num,void ** samplers)241 swr_bind_sampler_states(struct pipe_context *pipe,
242                         enum pipe_shader_type shader,
243                         unsigned start,
244                         unsigned num,
245                         void **samplers)
246 {
247    struct swr_context *ctx = swr_context(pipe);
248    unsigned i;
249 
250    assert(shader < PIPE_SHADER_TYPES);
251    assert(start + num <= ARRAY_SIZE(ctx->samplers[shader]));
252 
253    /* set the new samplers */
254    ctx->num_samplers[shader] = num;
255    for (i = 0; i < num; i++) {
256       ctx->samplers[shader][start + i] = (pipe_sampler_state *)samplers[i];
257    }
258 
259    ctx->dirty |= SWR_NEW_SAMPLER;
260 }
261 
262 static void
swr_delete_sampler_state(struct pipe_context * pipe,void * sampler)263 swr_delete_sampler_state(struct pipe_context *pipe, void *sampler)
264 {
265    FREE(sampler);
266 }
267 
268 
269 static struct pipe_sampler_view *
swr_create_sampler_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ)270 swr_create_sampler_view(struct pipe_context *pipe,
271                         struct pipe_resource *texture,
272                         const struct pipe_sampler_view *templ)
273 {
274    struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
275 
276    if (view) {
277       *view = *templ;
278       view->reference.count = 1;
279       view->texture = NULL;
280       pipe_resource_reference(&view->texture, texture);
281       view->context = pipe;
282    }
283 
284    return view;
285 }
286 
287 static void
swr_set_sampler_views(struct pipe_context * pipe,enum pipe_shader_type shader,unsigned start,unsigned num,struct pipe_sampler_view ** views)288 swr_set_sampler_views(struct pipe_context *pipe,
289                       enum pipe_shader_type shader,
290                       unsigned start,
291                       unsigned num,
292                       struct pipe_sampler_view **views)
293 {
294    struct swr_context *ctx = swr_context(pipe);
295    uint i;
296 
297    assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
298 
299    assert(shader < PIPE_SHADER_TYPES);
300    assert(start + num <= ARRAY_SIZE(ctx->sampler_views[shader]));
301 
302    /* set the new sampler views */
303    ctx->num_sampler_views[shader] = num;
304    for (i = 0; i < num; i++) {
305       /* Note: we're using pipe_sampler_view_release() here to work around
306        * a possible crash when the old view belongs to another context that
307        * was already destroyed.
308        */
309       pipe_sampler_view_release(pipe, &ctx->sampler_views[shader][start + i]);
310       pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i],
311                                   views[i]);
312    }
313 
314    ctx->dirty |= SWR_NEW_SAMPLER_VIEW;
315 }
316 
317 static void
swr_sampler_view_destroy(struct pipe_context * pipe,struct pipe_sampler_view * view)318 swr_sampler_view_destroy(struct pipe_context *pipe,
319                          struct pipe_sampler_view *view)
320 {
321    pipe_resource_reference(&view->texture, NULL);
322    FREE(view);
323 }
324 
325 static void *
swr_create_vs_state(struct pipe_context * pipe,const struct pipe_shader_state * vs)326 swr_create_vs_state(struct pipe_context *pipe,
327                     const struct pipe_shader_state *vs)
328 {
329    struct swr_vertex_shader *swr_vs = new swr_vertex_shader;
330    if (!swr_vs)
331       return NULL;
332 
333    swr_vs->pipe.tokens = tgsi_dup_tokens(vs->tokens);
334    swr_vs->pipe.stream_output = vs->stream_output;
335 
336    lp_build_tgsi_info(vs->tokens, &swr_vs->info);
337 
338    swr_vs->soState = {0};
339 
340    if (swr_vs->pipe.stream_output.num_outputs) {
341       pipe_stream_output_info *stream_output = &swr_vs->pipe.stream_output;
342 
343       swr_vs->soState.soEnable = true;
344       // soState.rasterizerDisable set on state dirty
345       // soState.streamToRasterizer not used
346 
347       for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
348          unsigned attrib_slot = stream_output->output[i].register_index;
349          attrib_slot = swr_so_adjust_attrib(attrib_slot, swr_vs);
350          swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
351             (1 << attrib_slot);
352       }
353       for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
354         swr_vs->soState.streamNumEntries[i] =
355              _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
356        }
357    }
358 
359    return swr_vs;
360 }
361 
362 static void
swr_bind_vs_state(struct pipe_context * pipe,void * vs)363 swr_bind_vs_state(struct pipe_context *pipe, void *vs)
364 {
365    struct swr_context *ctx = swr_context(pipe);
366 
367    if (ctx->vs == vs)
368       return;
369 
370    ctx->vs = (swr_vertex_shader *)vs;
371    ctx->dirty |= SWR_NEW_VS;
372 }
373 
374 static void
swr_delete_vs_state(struct pipe_context * pipe,void * vs)375 swr_delete_vs_state(struct pipe_context *pipe, void *vs)
376 {
377    struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs;
378    FREE((void *)swr_vs->pipe.tokens);
379    struct swr_screen *screen = swr_screen(pipe->screen);
380 
381    /* Defer deletion of vs state */
382    swr_fence_work_delete_vs(screen->flush_fence, swr_vs);
383 }
384 
385 static void *
swr_create_fs_state(struct pipe_context * pipe,const struct pipe_shader_state * fs)386 swr_create_fs_state(struct pipe_context *pipe,
387                     const struct pipe_shader_state *fs)
388 {
389    struct swr_fragment_shader *swr_fs = new swr_fragment_shader;
390    if (!swr_fs)
391       return NULL;
392 
393    swr_fs->pipe.tokens = tgsi_dup_tokens(fs->tokens);
394 
395    lp_build_tgsi_info(fs->tokens, &swr_fs->info);
396 
397    return swr_fs;
398 }
399 
400 
401 static void
swr_bind_fs_state(struct pipe_context * pipe,void * fs)402 swr_bind_fs_state(struct pipe_context *pipe, void *fs)
403 {
404    struct swr_context *ctx = swr_context(pipe);
405 
406    if (ctx->fs == fs)
407       return;
408 
409    ctx->fs = (swr_fragment_shader *)fs;
410    ctx->dirty |= SWR_NEW_FS;
411 }
412 
413 static void
swr_delete_fs_state(struct pipe_context * pipe,void * fs)414 swr_delete_fs_state(struct pipe_context *pipe, void *fs)
415 {
416    struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs;
417    FREE((void *)swr_fs->pipe.tokens);
418    struct swr_screen *screen = swr_screen(pipe->screen);
419 
420    /* Defer deleton of fs state */
421    swr_fence_work_delete_fs(screen->flush_fence, swr_fs);
422 }
423 
424 static void *
swr_create_gs_state(struct pipe_context * pipe,const struct pipe_shader_state * gs)425 swr_create_gs_state(struct pipe_context *pipe,
426                     const struct pipe_shader_state *gs)
427 {
428    struct swr_geometry_shader *swr_gs = new swr_geometry_shader;
429    if (!swr_gs)
430       return NULL;
431 
432    swr_gs->pipe.tokens = tgsi_dup_tokens(gs->tokens);
433 
434    lp_build_tgsi_info(gs->tokens, &swr_gs->info);
435 
436    return swr_gs;
437 }
438 
439 
440 static void
swr_bind_gs_state(struct pipe_context * pipe,void * gs)441 swr_bind_gs_state(struct pipe_context *pipe, void *gs)
442 {
443    struct swr_context *ctx = swr_context(pipe);
444 
445    if (ctx->gs == gs)
446       return;
447 
448    ctx->gs = (swr_geometry_shader *)gs;
449    ctx->dirty |= SWR_NEW_GS;
450 }
451 
452 static void
swr_delete_gs_state(struct pipe_context * pipe,void * gs)453 swr_delete_gs_state(struct pipe_context *pipe, void *gs)
454 {
455    struct swr_geometry_shader *swr_gs = (swr_geometry_shader *)gs;
456    FREE((void *)swr_gs->pipe.tokens);
457    struct swr_screen *screen = swr_screen(pipe->screen);
458 
459    /* Defer deleton of fs state */
460    swr_fence_work_delete_gs(screen->flush_fence, swr_gs);
461 }
462 
463 static void
swr_set_constant_buffer(struct pipe_context * pipe,enum pipe_shader_type shader,uint index,const struct pipe_constant_buffer * cb)464 swr_set_constant_buffer(struct pipe_context *pipe,
465                         enum pipe_shader_type shader,
466                         uint index,
467                         const struct pipe_constant_buffer *cb)
468 {
469    struct swr_context *ctx = swr_context(pipe);
470    struct pipe_resource *constants = cb ? cb->buffer : NULL;
471 
472    assert(shader < PIPE_SHADER_TYPES);
473    assert(index < ARRAY_SIZE(ctx->constants[shader]));
474 
475    /* note: reference counting */
476    util_copy_constant_buffer(&ctx->constants[shader][index], cb);
477 
478    if (shader == PIPE_SHADER_VERTEX) {
479       ctx->dirty |= SWR_NEW_VSCONSTANTS;
480    } else if (shader == PIPE_SHADER_FRAGMENT) {
481       ctx->dirty |= SWR_NEW_FSCONSTANTS;
482    } else if (shader == PIPE_SHADER_GEOMETRY) {
483       ctx->dirty |= SWR_NEW_GSCONSTANTS;
484    }
485 
486    if (cb && cb->user_buffer) {
487       pipe_resource_reference(&constants, NULL);
488    }
489 }
490 
491 
492 static void *
swr_create_vertex_elements_state(struct pipe_context * pipe,unsigned num_elements,const struct pipe_vertex_element * attribs)493 swr_create_vertex_elements_state(struct pipe_context *pipe,
494                                  unsigned num_elements,
495                                  const struct pipe_vertex_element *attribs)
496 {
497    struct swr_vertex_element_state *velems;
498    assert(num_elements <= PIPE_MAX_ATTRIBS);
499    velems = new swr_vertex_element_state;
500    if (velems) {
501       memset(&velems->fsState, 0, sizeof(velems->fsState));
502       velems->fsState.bVertexIDOffsetEnable = true;
503       velems->fsState.numAttribs = num_elements;
504       for (unsigned i = 0; i < num_elements; i++) {
505          // XXX: we should do this keyed on the VS usage info
506 
507          const struct util_format_description *desc =
508             util_format_description(attribs[i].src_format);
509 
510          velems->fsState.layout[i].AlignedByteOffset = attribs[i].src_offset;
511          velems->fsState.layout[i].Format =
512             mesa_to_swr_format(attribs[i].src_format);
513          velems->fsState.layout[i].StreamIndex =
514             attribs[i].vertex_buffer_index;
515          velems->fsState.layout[i].InstanceEnable =
516             attribs[i].instance_divisor != 0;
517          velems->fsState.layout[i].ComponentControl0 =
518             desc->channel[0].type != UTIL_FORMAT_TYPE_VOID
519             ? ComponentControl::StoreSrc
520             : ComponentControl::Store0;
521          velems->fsState.layout[i].ComponentControl1 =
522             desc->channel[1].type != UTIL_FORMAT_TYPE_VOID
523             ? ComponentControl::StoreSrc
524             : ComponentControl::Store0;
525          velems->fsState.layout[i].ComponentControl2 =
526             desc->channel[2].type != UTIL_FORMAT_TYPE_VOID
527             ? ComponentControl::StoreSrc
528             : ComponentControl::Store0;
529          velems->fsState.layout[i].ComponentControl3 =
530             desc->channel[3].type != UTIL_FORMAT_TYPE_VOID
531             ? ComponentControl::StoreSrc
532             : ComponentControl::Store1Fp;
533          velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW;
534          velems->fsState.layout[i].InstanceAdvancementState =
535             attribs[i].instance_divisor;
536 
537          /* Calculate the pitch of each stream */
538          const SWR_FORMAT_INFO &swr_desc = GetFormatInfo(
539             mesa_to_swr_format(attribs[i].src_format));
540          velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp;
541 
542          if (attribs[i].instance_divisor != 0) {
543             velems->instanced_bufs |= 1U << attribs[i].vertex_buffer_index;
544             uint32_t *min_instance_div =
545                &velems->min_instance_div[attribs[i].vertex_buffer_index];
546             if (!*min_instance_div ||
547                 attribs[i].instance_divisor < *min_instance_div)
548                *min_instance_div = attribs[i].instance_divisor;
549          }
550       }
551    }
552 
553    return velems;
554 }
555 
556 static void
swr_bind_vertex_elements_state(struct pipe_context * pipe,void * velems)557 swr_bind_vertex_elements_state(struct pipe_context *pipe, void *velems)
558 {
559    struct swr_context *ctx = swr_context(pipe);
560    struct swr_vertex_element_state *swr_velems =
561       (struct swr_vertex_element_state *)velems;
562 
563    ctx->velems = swr_velems;
564    ctx->dirty |= SWR_NEW_VERTEX;
565 }
566 
567 static void
swr_delete_vertex_elements_state(struct pipe_context * pipe,void * velems)568 swr_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
569 {
570    struct swr_vertex_element_state *swr_velems =
571       (struct swr_vertex_element_state *) velems;
572    /* XXX Need to destroy fetch shader? */
573    delete swr_velems;
574 }
575 
576 
577 static void
swr_set_vertex_buffers(struct pipe_context * pipe,unsigned start_slot,unsigned num_elements,const struct pipe_vertex_buffer * buffers)578 swr_set_vertex_buffers(struct pipe_context *pipe,
579                        unsigned start_slot,
580                        unsigned num_elements,
581                        const struct pipe_vertex_buffer *buffers)
582 {
583    struct swr_context *ctx = swr_context(pipe);
584 
585    assert(num_elements <= PIPE_MAX_ATTRIBS);
586 
587    util_set_vertex_buffers_count(ctx->vertex_buffer,
588                                  &ctx->num_vertex_buffers,
589                                  buffers,
590                                  start_slot,
591                                  num_elements);
592 
593    ctx->dirty |= SWR_NEW_VERTEX;
594 }
595 
596 
597 static void
swr_set_polygon_stipple(struct pipe_context * pipe,const struct pipe_poly_stipple * stipple)598 swr_set_polygon_stipple(struct pipe_context *pipe,
599                         const struct pipe_poly_stipple *stipple)
600 {
601    struct swr_context *ctx = swr_context(pipe);
602 
603    ctx->poly_stipple.pipe = *stipple; /* struct copy */
604    ctx->dirty |= SWR_NEW_STIPPLE;
605 }
606 
607 static void
swr_set_clip_state(struct pipe_context * pipe,const struct pipe_clip_state * clip)608 swr_set_clip_state(struct pipe_context *pipe,
609                    const struct pipe_clip_state *clip)
610 {
611    struct swr_context *ctx = swr_context(pipe);
612 
613    ctx->clip = *clip;
614    /* XXX Unimplemented, but prevents crash */
615 
616    ctx->dirty |= SWR_NEW_CLIP;
617 }
618 
619 
620 static void
swr_set_scissor_states(struct pipe_context * pipe,unsigned start_slot,unsigned num_viewports,const struct pipe_scissor_state * scissor)621 swr_set_scissor_states(struct pipe_context *pipe,
622                        unsigned start_slot,
623                        unsigned num_viewports,
624                        const struct pipe_scissor_state *scissor)
625 {
626    struct swr_context *ctx = swr_context(pipe);
627 
628    ctx->scissor = *scissor;
629    ctx->swr_scissor.xmin = scissor->minx;
630    ctx->swr_scissor.xmax = scissor->maxx;
631    ctx->swr_scissor.ymin = scissor->miny;
632    ctx->swr_scissor.ymax = scissor->maxy;
633    ctx->dirty |= SWR_NEW_SCISSOR;
634 }
635 
636 static void
swr_set_viewport_states(struct pipe_context * pipe,unsigned start_slot,unsigned num_viewports,const struct pipe_viewport_state * vpt)637 swr_set_viewport_states(struct pipe_context *pipe,
638                         unsigned start_slot,
639                         unsigned num_viewports,
640                         const struct pipe_viewport_state *vpt)
641 {
642    struct swr_context *ctx = swr_context(pipe);
643 
644    ctx->viewport = *vpt;
645    ctx->dirty |= SWR_NEW_VIEWPORT;
646 }
647 
648 
649 static void
swr_set_framebuffer_state(struct pipe_context * pipe,const struct pipe_framebuffer_state * fb)650 swr_set_framebuffer_state(struct pipe_context *pipe,
651                           const struct pipe_framebuffer_state *fb)
652 {
653    struct swr_context *ctx = swr_context(pipe);
654 
655    boolean changed = !util_framebuffer_state_equal(&ctx->framebuffer, fb);
656 
657    assert(fb->width <= KNOB_GUARDBAND_WIDTH);
658    assert(fb->height <= KNOB_GUARDBAND_HEIGHT);
659 
660    if (changed) {
661       util_copy_framebuffer_state(&ctx->framebuffer, fb);
662 
663       /* 0 and 1 both indicate no msaa.  Core doesn't understand 0 samples */
664       ctx->framebuffer.samples = std::max((ubyte)1, ctx->framebuffer.samples);
665 
666       ctx->dirty |= SWR_NEW_FRAMEBUFFER;
667    }
668 }
669 
670 
671 static void
swr_set_sample_mask(struct pipe_context * pipe,unsigned sample_mask)672 swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
673 {
674    struct swr_context *ctx = swr_context(pipe);
675 
676    if (sample_mask != ctx->sample_mask) {
677       ctx->sample_mask = sample_mask;
678       ctx->dirty |= SWR_NEW_RASTERIZER;
679    }
680 }
681 
682 /*
683  * MSAA fixed sample position table
684  * used by update_derived and get_sample_position
685  * (integer locations on a 16x16 grid)
686  */
687 static const uint8_t swr_sample_positions[][2] =
688 { /* 1x*/ { 8, 8},
689   /* 2x*/ {12,12},{ 4, 4},
690   /* 4x*/ { 6, 2},{14, 6},{ 2,10},{10,14},
691   /* 8x*/ { 9, 5},{ 7,11},{13, 9},{ 5, 3},
692           { 3,13},{ 1, 7},{11,15},{15, 1},
693   /*16x*/ { 9, 9},{ 7, 5},{ 5,10},{12, 7},
694           { 3, 6},{10,13},{13,11},{11, 3},
695           { 6,14},{ 8, 1},{ 4, 2},{ 2,12},
696           { 0, 8},{15, 4},{14,15},{ 1, 0} };
697 
698 static void
swr_get_sample_position(struct pipe_context * pipe,unsigned sample_count,unsigned sample_index,float * out_value)699 swr_get_sample_position(struct pipe_context *pipe,
700                         unsigned sample_count, unsigned sample_index,
701                         float *out_value)
702 {
703    /* validate sample_count */
704    sample_count = GetNumSamples(GetSampleCount(sample_count));
705 
706    const uint8_t *sample = swr_sample_positions[sample_count-1 + sample_index];
707    out_value[0] = sample[0] / 16.0f;
708    out_value[1] = sample[1] / 16.0f;
709 }
710 
711 
712 /*
713  * Update resource in-use status
714  * All resources bound to color or depth targets marked as WRITE resources.
715  * VBO Vertex/index buffers and texture views marked as READ resources.
716  */
717 void
swr_update_resource_status(struct pipe_context * pipe,const struct pipe_draw_info * p_draw_info)718 swr_update_resource_status(struct pipe_context *pipe,
719                            const struct pipe_draw_info *p_draw_info)
720 {
721    struct swr_context *ctx = swr_context(pipe);
722    struct pipe_framebuffer_state *fb = &ctx->framebuffer;
723 
724    /* colorbuffer targets */
725    if (fb->nr_cbufs)
726       for (uint32_t i = 0; i < fb->nr_cbufs; ++i)
727          if (fb->cbufs[i])
728             swr_resource_write(fb->cbufs[i]->texture);
729 
730    /* depth/stencil target */
731    if (fb->zsbuf)
732       swr_resource_write(fb->zsbuf->texture);
733 
734    /* VBO vertex buffers */
735    for (uint32_t i = 0; i < ctx->num_vertex_buffers; i++) {
736       struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
737       if (!vb->is_user_buffer)
738          swr_resource_read(vb->buffer.resource);
739    }
740 
741    /* VBO index buffer */
742    if (p_draw_info && p_draw_info->index_size) {
743       if (!p_draw_info->has_user_indices)
744          swr_resource_read(p_draw_info->index.resource);
745    }
746 
747    /* transform feedback buffers */
748    for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
749       struct pipe_stream_output_target *target = ctx->so_targets[i];
750       if (target && target->buffer)
751          swr_resource_write(target->buffer);
752    }
753 
754    /* texture sampler views */
755    for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
756       for (uint32_t i = 0; i < ctx->num_sampler_views[j]; i++) {
757          struct pipe_sampler_view *view = ctx->sampler_views[j][i];
758          if (view)
759             swr_resource_read(view->texture);
760       }
761    }
762 
763    /* constant buffers */
764    for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
765       for (uint32_t i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
766          struct pipe_constant_buffer *cb = &ctx->constants[j][i];
767          if (cb->buffer)
768             swr_resource_read(cb->buffer);
769       }
770    }
771 }
772 
773 static void
swr_update_texture_state(struct swr_context * ctx,enum pipe_shader_type shader_type,unsigned num_sampler_views,swr_jit_texture * textures)774 swr_update_texture_state(struct swr_context *ctx,
775                          enum pipe_shader_type shader_type,
776                          unsigned num_sampler_views,
777                          swr_jit_texture *textures)
778 {
779    for (unsigned i = 0; i < num_sampler_views; i++) {
780       struct pipe_sampler_view *view =
781          ctx->sampler_views[shader_type][i];
782       struct swr_jit_texture *jit_tex = &textures[i];
783 
784       memset(jit_tex, 0, sizeof(*jit_tex));
785       if (view) {
786          struct pipe_resource *res = view->texture;
787          struct swr_resource *swr_res = swr_resource(res);
788          SWR_SURFACE_STATE *swr = &swr_res->swr;
789          size_t *mip_offsets = swr_res->mip_offsets;
790          if (swr_res->has_depth && swr_res->has_stencil &&
791             !util_format_has_depth(util_format_description(view->format))) {
792             swr = &swr_res->secondary;
793             mip_offsets = swr_res->secondary_mip_offsets;
794          }
795 
796          jit_tex->width = res->width0;
797          jit_tex->height = res->height0;
798          jit_tex->base_ptr = (uint8_t*)swr->xpBaseAddress;
799          if (view->target != PIPE_BUFFER) {
800             jit_tex->first_level = view->u.tex.first_level;
801             jit_tex->last_level = view->u.tex.last_level;
802             if (view->target == PIPE_TEXTURE_3D)
803                jit_tex->depth = res->depth0;
804             else
805                jit_tex->depth =
806                   view->u.tex.last_layer - view->u.tex.first_layer + 1;
807             jit_tex->base_ptr += view->u.tex.first_layer *
808                swr->qpitch * swr->pitch;
809          } else {
810             unsigned view_blocksize = util_format_get_blocksize(view->format);
811             jit_tex->base_ptr += view->u.buf.offset;
812             jit_tex->width = view->u.buf.size / view_blocksize;
813             jit_tex->depth = 1;
814          }
815 
816          for (unsigned level = jit_tex->first_level;
817               level <= jit_tex->last_level;
818               level++) {
819             jit_tex->row_stride[level] = swr->pitch;
820             jit_tex->img_stride[level] = swr->qpitch * swr->pitch;
821             jit_tex->mip_offsets[level] = mip_offsets[level];
822          }
823       }
824    }
825 }
826 
827 static void
swr_update_sampler_state(struct swr_context * ctx,enum pipe_shader_type shader_type,unsigned num_samplers,swr_jit_sampler * samplers)828 swr_update_sampler_state(struct swr_context *ctx,
829                          enum pipe_shader_type shader_type,
830                          unsigned num_samplers,
831                          swr_jit_sampler *samplers)
832 {
833    for (unsigned i = 0; i < num_samplers; i++) {
834       const struct pipe_sampler_state *sampler =
835          ctx->samplers[shader_type][i];
836 
837       if (sampler) {
838          samplers[i].min_lod = sampler->min_lod;
839          samplers[i].max_lod = sampler->max_lod;
840          samplers[i].lod_bias = sampler->lod_bias;
841          COPY_4V(samplers[i].border_color, sampler->border_color.f);
842       }
843    }
844 }
845 
846 static void
swr_update_constants(struct swr_context * ctx,enum pipe_shader_type shaderType)847 swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType)
848 {
849    swr_draw_context *pDC = &ctx->swrDC;
850 
851    const float **constant;
852    uint32_t *num_constants;
853    struct swr_scratch_space *scratch;
854 
855    switch (shaderType) {
856    case PIPE_SHADER_VERTEX:
857       constant = pDC->constantVS;
858       num_constants = pDC->num_constantsVS;
859       scratch = &ctx->scratch->vs_constants;
860       break;
861    case PIPE_SHADER_FRAGMENT:
862       constant = pDC->constantFS;
863       num_constants = pDC->num_constantsFS;
864       scratch = &ctx->scratch->fs_constants;
865       break;
866    case PIPE_SHADER_GEOMETRY:
867       constant = pDC->constantGS;
868       num_constants = pDC->num_constantsGS;
869       scratch = &ctx->scratch->gs_constants;
870       break;
871    default:
872       debug_printf("Unsupported shader type constants\n");
873       return;
874    }
875 
876    for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
877       const pipe_constant_buffer *cb = &ctx->constants[shaderType][i];
878       num_constants[i] = cb->buffer_size;
879       if (cb->buffer) {
880          constant[i] =
881             (const float *)(swr_resource_data(cb->buffer) +
882                             cb->buffer_offset);
883       } else {
884          /* Need to copy these constants to scratch space */
885          if (cb->user_buffer && cb->buffer_size) {
886             const void *ptr =
887                ((const uint8_t *)cb->user_buffer + cb->buffer_offset);
888             uint32_t size = AlignUp(cb->buffer_size, 4);
889             ptr = swr_copy_to_scratch_space(ctx, scratch, ptr, size);
890             constant[i] = (const float *)ptr;
891          }
892       }
893    }
894 }
895 
896 static bool
swr_change_rt(struct swr_context * ctx,unsigned attachment,const struct pipe_surface * sf)897 swr_change_rt(struct swr_context *ctx,
898               unsigned attachment,
899               const struct pipe_surface *sf)
900 {
901    swr_draw_context *pDC = &ctx->swrDC;
902    struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment];
903 
904    /* Do nothing if the render target hasn't changed */
905    if ((!sf || !sf->texture) && (void*)(rt->xpBaseAddress) == nullptr)
906       return false;
907 
908    /* Deal with disabling RT up front */
909    if (!sf || !sf->texture) {
910       /* If detaching attachment, mark tiles as RESOLVED so core
911        * won't try to load from non-existent target. */
912       swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_RESOLVED);
913       *rt = {0};
914       return true;
915    }
916 
917    const struct swr_resource *swr = swr_resource(sf->texture);
918    const SWR_SURFACE_STATE *swr_surface = &swr->swr;
919    SWR_FORMAT fmt = mesa_to_swr_format(sf->format);
920 
921    if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.xpBaseAddress) {
922       swr_surface = &swr->secondary;
923       fmt = swr_surface->format;
924    }
925 
926    if (rt->xpBaseAddress == swr_surface->xpBaseAddress &&
927        rt->format == fmt &&
928        rt->lod == sf->u.tex.level &&
929        rt->arrayIndex == sf->u.tex.first_layer)
930       return false;
931 
932    bool need_fence = false;
933 
934    /* StoreTile for changed target */
935    if (rt->xpBaseAddress) {
936       /* If changing attachment to a new target, mark tiles as
937        * INVALID so they are reloaded from surface. */
938       swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID);
939       need_fence = true;
940    } else {
941       /* if no previous attachment, invalidate tiles that may be marked
942        * RESOLVED because of an old attachment */
943       swr_invalidate_render_target(&ctx->pipe, attachment, sf->width, sf->height);
944       /* no need to set fence here */
945    }
946 
947    /* Make new attachment */
948    *rt = *swr_surface;
949    rt->format = fmt;
950    rt->lod = sf->u.tex.level;
951    rt->arrayIndex = sf->u.tex.first_layer;
952 
953    return need_fence;
954 }
955 
956 /*
957  * for cases where resources are shared between contexts, invalidate
958  * this ctx's resource. so it can be fetched fresh.  Old ctx's resource
959  * is already stored during a flush
960  */
961 static inline void
swr_invalidate_buffers_after_ctx_change(struct pipe_context * pipe)962 swr_invalidate_buffers_after_ctx_change(struct pipe_context *pipe)
963 {
964    struct swr_context *ctx = swr_context(pipe);
965 
966    for (uint32_t i = 0; i < ctx->framebuffer.nr_cbufs; i++) {
967       struct pipe_surface *cb = ctx->framebuffer.cbufs[i];
968       if (cb) {
969          struct swr_resource *res = swr_resource(cb->texture);
970          if (res->curr_pipe != pipe) {
971             /* if curr_pipe is NULL (first use), status should not be WRITE */
972             assert(res->curr_pipe || !(res->status & SWR_RESOURCE_WRITE));
973             if (res->status & SWR_RESOURCE_WRITE) {
974                swr_invalidate_render_target(pipe, i, cb->width, cb->height);
975             }
976          }
977          res->curr_pipe = pipe;
978       }
979    }
980    if (ctx->framebuffer.zsbuf) {
981       struct pipe_surface *zb = ctx->framebuffer.zsbuf;
982       if (zb) {
983          struct swr_resource *res = swr_resource(zb->texture);
984          if (res->curr_pipe != pipe) {
985             /* if curr_pipe is NULL (first use), status should not be WRITE */
986             assert(res->curr_pipe || !(res->status & SWR_RESOURCE_WRITE));
987             if (res->status & SWR_RESOURCE_WRITE) {
988                swr_invalidate_render_target(pipe, SWR_ATTACHMENT_DEPTH, zb->width, zb->height);
989                swr_invalidate_render_target(pipe, SWR_ATTACHMENT_STENCIL, zb->width, zb->height);
990             }
991          }
992          res->curr_pipe = pipe;
993       }
994    }
995 }
996 
997 static inline void
swr_user_vbuf_range(const struct pipe_draw_info * info,const struct swr_vertex_element_state * velems,const struct pipe_vertex_buffer * vb,uint32_t i,uint32_t * totelems,uint32_t * base,uint32_t * size)998 swr_user_vbuf_range(const struct pipe_draw_info *info,
999                     const struct swr_vertex_element_state *velems,
1000                     const struct pipe_vertex_buffer *vb,
1001                     uint32_t i,
1002                     uint32_t *totelems,
1003                     uint32_t *base,
1004                     uint32_t *size)
1005 {
1006    /* FIXME: The size is too large - we don't access the full extra stride. */
1007    unsigned elems;
1008    if (velems->instanced_bufs & (1U << i)) {
1009       elems = info->instance_count / velems->min_instance_div[i] + 1;
1010       *totelems = info->start_instance + elems;
1011       *base = info->start_instance * vb->stride;
1012       *size = elems * vb->stride;
1013    } else if (vb->stride) {
1014       elems = info->max_index - info->min_index + 1;
1015       *totelems = (info->max_index + info->index_bias) + 1;
1016       *base = (info->min_index + info->index_bias) * vb->stride;
1017       *size = elems * vb->stride;
1018    } else {
1019       *totelems = 1;
1020       *base = 0;
1021       *size = velems->stream_pitch[i];
1022    }
1023 }
1024 
1025 static void
swr_update_poly_stipple(struct swr_context * ctx)1026 swr_update_poly_stipple(struct swr_context *ctx)
1027 {
1028    struct swr_draw_context *pDC = &ctx->swrDC;
1029 
1030    assert(sizeof(ctx->poly_stipple.pipe.stipple) == sizeof(pDC->polyStipple));
1031    memcpy(pDC->polyStipple,
1032           ctx->poly_stipple.pipe.stipple,
1033           sizeof(ctx->poly_stipple.pipe.stipple));
1034 }
1035 
1036 void
swr_update_derived(struct pipe_context * pipe,const struct pipe_draw_info * p_draw_info)1037 swr_update_derived(struct pipe_context *pipe,
1038                    const struct pipe_draw_info *p_draw_info)
1039 {
1040    struct swr_context *ctx = swr_context(pipe);
1041    struct swr_screen *screen = swr_screen(pipe->screen);
1042 
1043    /* When called from swr_clear (p_draw_info = null), set any null
1044     * state-objects to the dummy state objects to prevent nullptr dereference
1045     * in validation below.
1046     *
1047     * Important that this remains static for zero initialization.  These
1048     * aren't meant to be proper state objects, just empty structs. They will
1049     * not be written to.
1050     *
1051     * Shaders can't be part of the union since they contain std::unordered_map
1052     */
1053    static struct {
1054       union {
1055          struct pipe_rasterizer_state rasterizer;
1056          struct pipe_depth_stencil_alpha_state depth_stencil;
1057          struct swr_blend_state blend;
1058       } state;
1059       struct swr_vertex_shader vs;
1060       struct swr_fragment_shader fs;
1061    } swr_dummy;
1062 
1063    if (!p_draw_info) {
1064       if (!ctx->rasterizer)
1065          ctx->rasterizer = &swr_dummy.state.rasterizer;
1066       if (!ctx->depth_stencil)
1067          ctx->depth_stencil = &swr_dummy.state.depth_stencil;
1068       if (!ctx->blend)
1069          ctx->blend = &swr_dummy.state.blend;
1070       if (!ctx->vs)
1071          ctx->vs = &swr_dummy.vs;
1072       if (!ctx->fs)
1073          ctx->fs = &swr_dummy.fs;
1074    }
1075 
1076    /* Update screen->pipe to current pipe context. */
1077    screen->pipe = pipe;
1078 
1079    /* Any state that requires dirty flags to be re-triggered sets this mask */
1080    /* For example, user_buffer vertex and index buffers. */
1081    unsigned post_update_dirty_flags = 0;
1082 
1083    /* bring resources that changed context up-to-date */
1084    swr_invalidate_buffers_after_ctx_change(pipe);
1085 
1086    /* Render Targets */
1087    if (ctx->dirty & SWR_NEW_FRAMEBUFFER) {
1088       struct pipe_framebuffer_state *fb = &ctx->framebuffer;
1089       const struct util_format_description *desc = NULL;
1090       bool need_fence = false;
1091 
1092       /* colorbuffer targets */
1093       if (fb->nr_cbufs) {
1094          for (unsigned i = 0; i < fb->nr_cbufs; ++i)
1095             need_fence |= swr_change_rt(
1096                   ctx, SWR_ATTACHMENT_COLOR0 + i, fb->cbufs[i]);
1097       }
1098       for (unsigned i = fb->nr_cbufs; i < SWR_NUM_RENDERTARGETS; ++i)
1099          need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_COLOR0 + i, NULL);
1100 
1101       /* depth/stencil target */
1102       if (fb->zsbuf)
1103          desc = util_format_description(fb->zsbuf->format);
1104       if (fb->zsbuf && util_format_has_depth(desc))
1105          need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, fb->zsbuf);
1106       else
1107          need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, NULL);
1108 
1109       if (fb->zsbuf && util_format_has_stencil(desc))
1110          need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, fb->zsbuf);
1111       else
1112          need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, NULL);
1113 
1114       /* This fence ensures any attachment changes are resolved before the
1115        * next draw */
1116       if (need_fence)
1117          swr_fence_submit(ctx, screen->flush_fence);
1118    }
1119 
1120    /* Raster state */
1121    if (ctx->dirty & (SWR_NEW_RASTERIZER |
1122                      SWR_NEW_VS | // clipping
1123                      SWR_NEW_FRAMEBUFFER)) {
1124       pipe_rasterizer_state *rasterizer = ctx->rasterizer;
1125       pipe_framebuffer_state *fb = &ctx->framebuffer;
1126 
1127       SWR_RASTSTATE *rastState = &ctx->derived.rastState;
1128       rastState->cullMode = swr_convert_cull_mode(rasterizer->cull_face);
1129       rastState->frontWinding = rasterizer->front_ccw
1130          ? SWR_FRONTWINDING_CCW
1131          : SWR_FRONTWINDING_CW;
1132       rastState->scissorEnable = rasterizer->scissor;
1133       rastState->pointSize = rasterizer->point_size > 0.0f
1134          ? rasterizer->point_size
1135          : 1.0f;
1136       rastState->lineWidth = rasterizer->line_width > 0.0f
1137          ? rasterizer->line_width
1138          : 1.0f;
1139 
1140       rastState->pointParam = rasterizer->point_size_per_vertex;
1141 
1142       rastState->pointSpriteEnable = rasterizer->sprite_coord_enable;
1143       rastState->pointSpriteTopOrigin =
1144          rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
1145 
1146       /* If SWR_MSAA_FORCE_ENABLE is set, turn msaa on */
1147       if (screen->msaa_force_enable && !rasterizer->multisample) {
1148          /* Force enable and use the value the surface was created with */
1149          rasterizer->multisample = true;
1150          fb->samples = swr_resource(fb->cbufs[0]->texture)->swr.numSamples;
1151          fprintf(stderr,"msaa force enable: %d samples\n", fb->samples);
1152       }
1153 
1154       rastState->sampleCount = GetSampleCount(fb->samples);
1155       rastState->forcedSampleCount = false;
1156       rastState->bIsCenterPattern = !rasterizer->multisample;
1157       rastState->pixelLocation = SWR_PIXEL_LOCATION_CENTER;
1158 
1159       /* Only initialize sample positions if msaa is enabled */
1160       if (rasterizer->multisample) {
1161          for (uint32_t i = 0; i < fb->samples; i++) {
1162             const uint8_t *sample = swr_sample_positions[fb->samples-1 + i];
1163             rastState->samplePositions.SetXi(i, sample[0] << 4);
1164             rastState->samplePositions.SetYi(i, sample[1] << 4);
1165             rastState->samplePositions.SetX (i, sample[0] / 16.0f);
1166             rastState->samplePositions.SetY (i, sample[1] / 16.0f);
1167          }
1168          rastState->samplePositions.PrecalcSampleData(fb->samples);
1169       }
1170 
1171       bool do_offset = false;
1172       switch (rasterizer->fill_front) {
1173       case PIPE_POLYGON_MODE_FILL:
1174          do_offset = rasterizer->offset_tri;
1175          break;
1176       case PIPE_POLYGON_MODE_LINE:
1177          do_offset = rasterizer->offset_line;
1178          break;
1179       case PIPE_POLYGON_MODE_POINT:
1180          do_offset = rasterizer->offset_point;
1181          break;
1182       }
1183 
1184       if (do_offset) {
1185          rastState->depthBias = rasterizer->offset_units;
1186          rastState->slopeScaledDepthBias = rasterizer->offset_scale;
1187          rastState->depthBiasClamp = rasterizer->offset_clamp;
1188       } else {
1189          rastState->depthBias = 0;
1190          rastState->slopeScaledDepthBias = 0;
1191          rastState->depthBiasClamp = 0;
1192       }
1193 
1194       /* translate polygon mode, at least for the front==back case */
1195       rastState->fillMode = swr_convert_fill_mode(rasterizer->fill_front);
1196 
1197       struct pipe_surface *zb = fb->zsbuf;
1198       if (zb && swr_resource(zb->texture)->has_depth)
1199          rastState->depthFormat = swr_resource(zb->texture)->swr.format;
1200 
1201       rastState->depthClipEnable = rasterizer->depth_clip;
1202       rastState->clipHalfZ = rasterizer->clip_halfz;
1203 
1204       ctx->api.pfnSwrSetRastState(ctx->swrContext, rastState);
1205    }
1206 
1207    /* Viewport */
1208    if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER
1209                      | SWR_NEW_RASTERIZER)) {
1210       pipe_viewport_state *state = &ctx->viewport;
1211       pipe_framebuffer_state *fb = &ctx->framebuffer;
1212       pipe_rasterizer_state *rasterizer = ctx->rasterizer;
1213 
1214       SWR_VIEWPORT *vp = &ctx->derived.vp;
1215       SWR_VIEWPORT_MATRICES *vpm = &ctx->derived.vpm;
1216 
1217       vp->x = state->translate[0] - state->scale[0];
1218       vp->width = 2 * state->scale[0];
1219       vp->y = state->translate[1] - fabs(state->scale[1]);
1220       vp->height = 2 * fabs(state->scale[1]);
1221       util_viewport_zmin_zmax(state, rasterizer->clip_halfz,
1222                               &vp->minZ, &vp->maxZ);
1223 
1224       vpm->m00[0] = state->scale[0];
1225       vpm->m11[0] = state->scale[1];
1226       vpm->m22[0] = state->scale[2];
1227       vpm->m30[0] = state->translate[0];
1228       vpm->m31[0] = state->translate[1];
1229       vpm->m32[0] = state->translate[2];
1230 
1231       /* Now that the matrix is calculated, clip the view coords to screen
1232        * size.  OpenGL allows for -ve x,y in the viewport. */
1233       if (vp->x < 0.0f) {
1234          vp->width += vp->x;
1235          vp->x = 0.0f;
1236       }
1237       if (vp->y < 0.0f) {
1238          vp->height += vp->y;
1239          vp->y = 0.0f;
1240       }
1241       vp->width = std::min(vp->width, (float)fb->width - vp->x);
1242       vp->height = std::min(vp->height, (float)fb->height - vp->y);
1243 
1244       ctx->api.pfnSwrSetViewports(ctx->swrContext, 1, vp, vpm);
1245    }
1246 
1247    /* When called from swr_clear (p_draw_info = null), render targets,
1248     * rasterState and viewports (dependent on render targets) are the only
1249     * necessary validation.  Defer remaining validation by setting
1250     * post_update_dirty_flags and clear all dirty flags.  BackendState is
1251     * still unconditionally validated below */
1252    if (!p_draw_info) {
1253       post_update_dirty_flags = ctx->dirty & ~(SWR_NEW_FRAMEBUFFER |
1254                                                SWR_NEW_RASTERIZER |
1255                                                SWR_NEW_VIEWPORT);
1256       ctx->dirty = 0;
1257    }
1258 
1259    /* Scissor */
1260    if (ctx->dirty & SWR_NEW_SCISSOR) {
1261       ctx->api.pfnSwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor);
1262    }
1263 
1264    /* Set vertex & index buffers */
1265    if (ctx->dirty & SWR_NEW_VERTEX) {
1266       const struct pipe_draw_info &info = *p_draw_info;
1267 
1268       /* vertex buffers */
1269       SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
1270       for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
1271          uint32_t size, pitch, elems, partial_inbounds;
1272          uint32_t min_vertex_index;
1273          const uint8_t *p_data;
1274          struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
1275 
1276          pitch = vb->stride;
1277          if (!vb->is_user_buffer) {
1278             /* VBO */
1279             if (!pitch) {
1280                /* If pitch=0 (ie vb->stride), buffer contains a single
1281                 * constant attribute.  Use the stream_pitch which was
1282                 * calculated during creation of vertex_elements_state for the
1283                 * size of the attribute. */
1284                size = ctx->velems->stream_pitch[i];
1285                elems = 1;
1286                partial_inbounds = 0;
1287                min_vertex_index = 0;
1288             } else {
1289                /* size is based on buffer->width0 rather than info.max_index
1290                 * to prevent having to validate VBO on each draw. */
1291                size = vb->buffer.resource->width0;
1292                elems = size / pitch;
1293                partial_inbounds = size % pitch;
1294                min_vertex_index = 0;
1295             }
1296 
1297             p_data = swr_resource_data(vb->buffer.resource) + vb->buffer_offset;
1298          } else {
1299             /* Client buffer
1300              * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
1301              * revalidate on each draw */
1302             post_update_dirty_flags |= SWR_NEW_VERTEX;
1303 
1304             uint32_t base;
1305             swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
1306             partial_inbounds = 0;
1307             min_vertex_index = info.min_index + info.index_bias;
1308 
1309             size = AlignUp(size, 4);
1310             /* If size of client memory copy is too large, don't copy. The
1311              * draw will access user-buffer directly and then block.  This is
1312              * faster than queuing many large client draws. */
1313             if (size >= screen->client_copy_limit) {
1314                post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW;
1315                p_data = (const uint8_t *) vb->buffer.user;
1316             } else {
1317                /* Copy only needed vertices to scratch space */
1318                const void *ptr = (const uint8_t *) vb->buffer.user + base;
1319                ptr = (uint8_t *)swr_copy_to_scratch_space(
1320                      ctx, &ctx->scratch->vertex_buffer, ptr, size);
1321                p_data = (const uint8_t *)ptr - base;
1322             }
1323          }
1324 
1325          swrVertexBuffers[i] = {0};
1326          swrVertexBuffers[i].index = i;
1327          swrVertexBuffers[i].pitch = pitch;
1328          swrVertexBuffers[i].pData = p_data;
1329          swrVertexBuffers[i].size = size;
1330          swrVertexBuffers[i].minVertex = min_vertex_index;
1331          swrVertexBuffers[i].maxVertex = elems;
1332          swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
1333       }
1334 
1335       ctx->api.pfnSwrSetVertexBuffers(
1336          ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers);
1337 
1338       /* index buffer, if required (info passed in by swr_draw_vbo) */
1339       SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */
1340       if (info.index_size) {
1341          const uint8_t *p_data;
1342          uint32_t size, pitch;
1343 
1344          pitch = info.index_size ? info.index_size : sizeof(uint32_t);
1345          index_type = swr_convert_index_type(pitch);
1346 
1347          if (!info.has_user_indices) {
1348             /* VBO
1349              * size is based on buffer->width0 rather than info.count
1350              * to prevent having to validate VBO on each draw */
1351             size = info.index.resource->width0;
1352             p_data = swr_resource_data(info.index.resource);
1353          } else {
1354             /* Client buffer
1355              * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
1356              * revalidate on each draw */
1357             post_update_dirty_flags |= SWR_NEW_VERTEX;
1358 
1359             size = info.count * pitch;
1360             size = AlignUp(size, 4);
1361             /* If size of client memory copy is too large, don't copy. The
1362              * draw will access user-buffer directly and then block.  This is
1363              * faster than queuing many large client draws. */
1364             if (size >= screen->client_copy_limit) {
1365                post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW;
1366                p_data = (const uint8_t *) info.index.user;
1367             } else {
1368                /* Copy indices to scratch space */
1369                const void *ptr = info.index.user;
1370                ptr = swr_copy_to_scratch_space(
1371                      ctx, &ctx->scratch->index_buffer, ptr, size);
1372                p_data = (const uint8_t *)ptr;
1373             }
1374          }
1375 
1376          SWR_INDEX_BUFFER_STATE swrIndexBuffer;
1377          swrIndexBuffer.format = swr_convert_index_type(info.index_size);
1378          swrIndexBuffer.pIndices = p_data;
1379          swrIndexBuffer.size = size;
1380 
1381          ctx->api.pfnSwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer);
1382       }
1383 
1384       struct swr_vertex_element_state *velems = ctx->velems;
1385       if (velems && velems->fsState.indexType != index_type) {
1386          velems->fsFunc = NULL;
1387          velems->fsState.indexType = index_type;
1388       }
1389    }
1390 
1391    /* GeometryShader */
1392    if (ctx->dirty & (SWR_NEW_GS |
1393                      SWR_NEW_VS |
1394                      SWR_NEW_SAMPLER |
1395                      SWR_NEW_SAMPLER_VIEW)) {
1396       if (ctx->gs) {
1397          swr_jit_gs_key key;
1398          swr_generate_gs_key(key, ctx, ctx->gs);
1399          auto search = ctx->gs->map.find(key);
1400          PFN_GS_FUNC func;
1401          if (search != ctx->gs->map.end()) {
1402             func = search->second->shader;
1403          } else {
1404             func = swr_compile_gs(ctx, key);
1405          }
1406          ctx->api.pfnSwrSetGsFunc(ctx->swrContext, func);
1407 
1408          /* JIT sampler state */
1409          if (ctx->dirty & SWR_NEW_SAMPLER) {
1410             swr_update_sampler_state(ctx,
1411                                      PIPE_SHADER_GEOMETRY,
1412                                      key.nr_samplers,
1413                                      ctx->swrDC.samplersGS);
1414          }
1415 
1416          /* JIT sampler view state */
1417          if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
1418             swr_update_texture_state(ctx,
1419                                      PIPE_SHADER_GEOMETRY,
1420                                      key.nr_sampler_views,
1421                                      ctx->swrDC.texturesGS);
1422          }
1423 
1424          ctx->api.pfnSwrSetGsState(ctx->swrContext, &ctx->gs->gsState);
1425       } else {
1426          SWR_GS_STATE state = { 0 };
1427          ctx->api.pfnSwrSetGsState(ctx->swrContext, &state);
1428          ctx->api.pfnSwrSetGsFunc(ctx->swrContext, NULL);
1429       }
1430    }
1431 
1432    /* VertexShader */
1433    if (ctx->dirty & (SWR_NEW_VS |
1434                      SWR_NEW_RASTERIZER | // for clip planes
1435                      SWR_NEW_SAMPLER |
1436                      SWR_NEW_SAMPLER_VIEW |
1437                      SWR_NEW_FRAMEBUFFER)) {
1438       swr_jit_vs_key key;
1439       swr_generate_vs_key(key, ctx, ctx->vs);
1440       auto search = ctx->vs->map.find(key);
1441       PFN_VERTEX_FUNC func;
1442       if (search != ctx->vs->map.end()) {
1443          func = search->second->shader;
1444       } else {
1445          func = swr_compile_vs(ctx, key);
1446       }
1447       ctx->api.pfnSwrSetVertexFunc(ctx->swrContext, func);
1448 
1449       /* JIT sampler state */
1450       if (ctx->dirty & SWR_NEW_SAMPLER) {
1451          swr_update_sampler_state(ctx,
1452                                   PIPE_SHADER_VERTEX,
1453                                   key.nr_samplers,
1454                                   ctx->swrDC.samplersVS);
1455       }
1456 
1457       /* JIT sampler view state */
1458       if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
1459          swr_update_texture_state(ctx,
1460                                   PIPE_SHADER_VERTEX,
1461                                   key.nr_sampler_views,
1462                                   ctx->swrDC.texturesVS);
1463       }
1464    }
1465 
1466    /* work around the fact that poly stipple also affects lines */
1467    /* and points, since we rasterize them as triangles, too */
1468    /* Has to be before fragment shader, since it sets SWR_NEW_FS */
1469    if (p_draw_info) {
1470       bool new_prim_is_poly =
1471          (u_reduced_prim(p_draw_info->mode) == PIPE_PRIM_TRIANGLES) &&
1472          (ctx->derived.rastState.fillMode == SWR_FILLMODE_SOLID);
1473       if (new_prim_is_poly != ctx->poly_stipple.prim_is_poly) {
1474          ctx->dirty |= SWR_NEW_FS;
1475          ctx->poly_stipple.prim_is_poly = new_prim_is_poly;
1476       }
1477    }
1478 
1479    /* FragmentShader */
1480    if (ctx->dirty & (SWR_NEW_FS |
1481                      SWR_NEW_VS |
1482                      SWR_NEW_GS |
1483                      SWR_NEW_RASTERIZER |
1484                      SWR_NEW_SAMPLER |
1485                      SWR_NEW_SAMPLER_VIEW |
1486                      SWR_NEW_FRAMEBUFFER)) {
1487       swr_jit_fs_key key;
1488       swr_generate_fs_key(key, ctx, ctx->fs);
1489       auto search = ctx->fs->map.find(key);
1490       PFN_PIXEL_KERNEL func;
1491       if (search != ctx->fs->map.end()) {
1492          func = search->second->shader;
1493       } else {
1494          func = swr_compile_fs(ctx, key);
1495       }
1496       SWR_PS_STATE psState = {0};
1497       psState.pfnPixelShader = func;
1498       psState.killsPixel = ctx->fs->info.base.uses_kill;
1499       psState.inputCoverage = SWR_INPUT_COVERAGE_NORMAL;
1500       psState.writesODepth = ctx->fs->info.base.writes_z;
1501       psState.usesSourceDepth = ctx->fs->info.base.reads_z;
1502       psState.shadingRate = SWR_SHADING_RATE_PIXEL;
1503       psState.renderTargetMask = (1 << ctx->framebuffer.nr_cbufs) - 1;
1504       psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE;
1505       uint32_t barycentricsMask = 0;
1506 #if 0
1507       // when we switch to mesa-master
1508       if (ctx->fs->info.base.uses_persp_center ||
1509           ctx->fs->info.base.uses_linear_center)
1510          barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
1511       if (ctx->fs->info.base.uses_persp_centroid ||
1512           ctx->fs->info.base.uses_linear_centroid)
1513          barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
1514       if (ctx->fs->info.base.uses_persp_sample ||
1515           ctx->fs->info.base.uses_linear_sample)
1516          barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
1517 #else
1518       for (unsigned i = 0; i < ctx->fs->info.base.num_inputs; i++) {
1519          switch (ctx->fs->info.base.input_interpolate_loc[i]) {
1520          case TGSI_INTERPOLATE_LOC_CENTER:
1521             barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
1522             break;
1523          case TGSI_INTERPOLATE_LOC_CENTROID:
1524             barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
1525             break;
1526          case TGSI_INTERPOLATE_LOC_SAMPLE:
1527             barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
1528             break;
1529          }
1530       }
1531 #endif
1532       psState.barycentricsMask = barycentricsMask;
1533       psState.usesUAV = false; // XXX
1534       psState.forceEarlyZ = false;
1535       ctx->api.pfnSwrSetPixelShaderState(ctx->swrContext, &psState);
1536 
1537       /* JIT sampler state */
1538       if (ctx->dirty & (SWR_NEW_SAMPLER |
1539                         SWR_NEW_FS)) {
1540          swr_update_sampler_state(ctx,
1541                                   PIPE_SHADER_FRAGMENT,
1542                                   key.nr_samplers,
1543                                   ctx->swrDC.samplersFS);
1544       }
1545 
1546       /* JIT sampler view state */
1547       if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW |
1548                         SWR_NEW_FRAMEBUFFER |
1549                         SWR_NEW_FS)) {
1550          swr_update_texture_state(ctx,
1551                                   PIPE_SHADER_FRAGMENT,
1552                                   key.nr_sampler_views,
1553                                   ctx->swrDC.texturesFS);
1554       }
1555    }
1556 
1557 
1558    /* VertexShader Constants */
1559    if (ctx->dirty & SWR_NEW_VSCONSTANTS) {
1560       swr_update_constants(ctx, PIPE_SHADER_VERTEX);
1561    }
1562 
1563    /* FragmentShader Constants */
1564    if (ctx->dirty & SWR_NEW_FSCONSTANTS) {
1565       swr_update_constants(ctx, PIPE_SHADER_FRAGMENT);
1566    }
1567 
1568    /* GeometryShader Constants */
1569    if (ctx->dirty & SWR_NEW_GSCONSTANTS) {
1570       swr_update_constants(ctx, PIPE_SHADER_GEOMETRY);
1571    }
1572 
1573    /* Depth/stencil state */
1574    if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) {
1575       struct pipe_depth_state *depth = &(ctx->depth_stencil->depth);
1576       struct pipe_stencil_state *stencil = ctx->depth_stencil->stencil;
1577       SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}};
1578       SWR_DEPTH_BOUNDS_STATE depthBoundsState = {0};
1579 
1580       /* XXX, incomplete.  Need to flesh out stencil & alpha test state
1581       struct pipe_stencil_state *front_stencil =
1582       ctx->depth_stencil.stencil[0];
1583       struct pipe_stencil_state *back_stencil = ctx->depth_stencil.stencil[1];
1584       struct pipe_alpha_state alpha;
1585       */
1586       if (stencil[0].enabled) {
1587          depthStencilState.stencilWriteEnable = 1;
1588          depthStencilState.stencilTestEnable = 1;
1589          depthStencilState.stencilTestFunc =
1590             swr_convert_depth_func(stencil[0].func);
1591 
1592          depthStencilState.stencilPassDepthPassOp =
1593             swr_convert_stencil_op(stencil[0].zpass_op);
1594          depthStencilState.stencilPassDepthFailOp =
1595             swr_convert_stencil_op(stencil[0].zfail_op);
1596          depthStencilState.stencilFailOp =
1597             swr_convert_stencil_op(stencil[0].fail_op);
1598          depthStencilState.stencilWriteMask = stencil[0].writemask;
1599          depthStencilState.stencilTestMask = stencil[0].valuemask;
1600          depthStencilState.stencilRefValue = ctx->stencil_ref.ref_value[0];
1601       }
1602       if (stencil[1].enabled) {
1603          depthStencilState.doubleSidedStencilTestEnable = 1;
1604 
1605          depthStencilState.backfaceStencilTestFunc =
1606             swr_convert_depth_func(stencil[1].func);
1607 
1608          depthStencilState.backfaceStencilPassDepthPassOp =
1609             swr_convert_stencil_op(stencil[1].zpass_op);
1610          depthStencilState.backfaceStencilPassDepthFailOp =
1611             swr_convert_stencil_op(stencil[1].zfail_op);
1612          depthStencilState.backfaceStencilFailOp =
1613             swr_convert_stencil_op(stencil[1].fail_op);
1614          depthStencilState.backfaceStencilWriteMask = stencil[1].writemask;
1615          depthStencilState.backfaceStencilTestMask = stencil[1].valuemask;
1616 
1617          depthStencilState.backfaceStencilRefValue =
1618             ctx->stencil_ref.ref_value[1];
1619       }
1620 
1621       depthStencilState.depthTestEnable = depth->enabled;
1622       depthStencilState.depthTestFunc = swr_convert_depth_func(depth->func);
1623       depthStencilState.depthWriteEnable = depth->writemask;
1624       ctx->api.pfnSwrSetDepthStencilState(ctx->swrContext, &depthStencilState);
1625 
1626       depthBoundsState.depthBoundsTestEnable = depth->bounds_test;
1627       depthBoundsState.depthBoundsTestMinValue = depth->bounds_min;
1628       depthBoundsState.depthBoundsTestMaxValue = depth->bounds_max;
1629       ctx->api.pfnSwrSetDepthBoundsState(ctx->swrContext, &depthBoundsState);
1630    }
1631 
1632    /* Blend State */
1633    if (ctx->dirty & (SWR_NEW_BLEND |
1634                      SWR_NEW_RASTERIZER |
1635                      SWR_NEW_FRAMEBUFFER |
1636                      SWR_NEW_DEPTH_STENCIL_ALPHA)) {
1637       struct pipe_framebuffer_state *fb = &ctx->framebuffer;
1638 
1639       SWR_BLEND_STATE blendState;
1640       memcpy(&blendState, &ctx->blend->blendState, sizeof(blendState));
1641       blendState.constantColor[0] = ctx->blend_color.color[0];
1642       blendState.constantColor[1] = ctx->blend_color.color[1];
1643       blendState.constantColor[2] = ctx->blend_color.color[2];
1644       blendState.constantColor[3] = ctx->blend_color.color[3];
1645       blendState.alphaTestReference =
1646          *((uint32_t*)&ctx->depth_stencil->alpha.ref_value);
1647 
1648       blendState.sampleMask = ctx->sample_mask;
1649       blendState.sampleCount = GetSampleCount(fb->samples);
1650 
1651       /* If there are no color buffers bound, disable writes on RT0
1652        * and skip loop */
1653       if (fb->nr_cbufs == 0) {
1654          blendState.renderTarget[0].writeDisableRed = 1;
1655          blendState.renderTarget[0].writeDisableGreen = 1;
1656          blendState.renderTarget[0].writeDisableBlue = 1;
1657          blendState.renderTarget[0].writeDisableAlpha = 1;
1658          ctx->api.pfnSwrSetBlendFunc(ctx->swrContext, 0, NULL);
1659       }
1660       else
1661          for (int target = 0;
1662                target < std::min(SWR_NUM_RENDERTARGETS,
1663                                  PIPE_MAX_COLOR_BUFS);
1664                target++) {
1665             if (!fb->cbufs[target])
1666                continue;
1667 
1668             struct swr_resource *colorBuffer =
1669                swr_resource(fb->cbufs[target]->texture);
1670 
1671             BLEND_COMPILE_STATE compileState;
1672             memset(&compileState, 0, sizeof(compileState));
1673             compileState.format = colorBuffer->swr.format;
1674             memcpy(&compileState.blendState,
1675                    &ctx->blend->compileState[target],
1676                    sizeof(compileState.blendState));
1677 
1678             const SWR_FORMAT_INFO& info = GetFormatInfo(compileState.format);
1679             if (compileState.blendState.logicOpEnable &&
1680                 ((info.type[0] == SWR_TYPE_FLOAT) || info.isSRGB)) {
1681                compileState.blendState.logicOpEnable = false;
1682             }
1683 
1684             if (info.type[0] == SWR_TYPE_SINT || info.type[0] == SWR_TYPE_UINT)
1685                compileState.blendState.blendEnable = false;
1686 
1687             if (compileState.blendState.blendEnable == false &&
1688                 compileState.blendState.logicOpEnable == false &&
1689                 ctx->depth_stencil->alpha.enabled == 0) {
1690                ctx->api.pfnSwrSetBlendFunc(ctx->swrContext, target, NULL);
1691                continue;
1692             }
1693 
1694             compileState.desc.alphaTestEnable =
1695                ctx->depth_stencil->alpha.enabled;
1696             compileState.desc.independentAlphaBlendEnable =
1697                (compileState.blendState.sourceBlendFactor !=
1698                 compileState.blendState.sourceAlphaBlendFactor) ||
1699                (compileState.blendState.destBlendFactor !=
1700                 compileState.blendState.destAlphaBlendFactor) ||
1701                (compileState.blendState.colorBlendFunc !=
1702                 compileState.blendState.alphaBlendFunc);
1703             compileState.desc.alphaToCoverageEnable =
1704                ctx->blend->pipe.alpha_to_coverage;
1705             compileState.desc.sampleMaskEnable = (blendState.sampleMask != 0);
1706             compileState.desc.numSamples = fb->samples;
1707 
1708             compileState.alphaTestFunction =
1709                swr_convert_depth_func(ctx->depth_stencil->alpha.func);
1710             compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx
1711 
1712             compileState.Canonicalize();
1713 
1714             PFN_BLEND_JIT_FUNC func = NULL;
1715             auto search = ctx->blendJIT->find(compileState);
1716             if (search != ctx->blendJIT->end()) {
1717                func = search->second;
1718             } else {
1719                HANDLE hJitMgr = screen->hJitMgr;
1720                func = JitCompileBlend(hJitMgr, compileState);
1721                debug_printf("BLEND shader %p\n", func);
1722                assert(func && "Error: BlendShader = NULL");
1723 
1724                ctx->blendJIT->insert(std::make_pair(compileState, func));
1725             }
1726             ctx->api.pfnSwrSetBlendFunc(ctx->swrContext, target, func);
1727          }
1728 
1729       ctx->api.pfnSwrSetBlendState(ctx->swrContext, &blendState);
1730    }
1731 
1732    if (ctx->dirty & SWR_NEW_STIPPLE) {
1733       swr_update_poly_stipple(ctx);
1734    }
1735 
1736    if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_SO | SWR_NEW_RASTERIZER)) {
1737       ctx->vs->soState.rasterizerDisable =
1738          ctx->rasterizer->rasterizer_discard;
1739       ctx->api.pfnSwrSetSoState(ctx->swrContext, &ctx->vs->soState);
1740 
1741       pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output;
1742 
1743       for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
1744          SWR_STREAMOUT_BUFFER buffer = {0};
1745          if (!ctx->so_targets[i])
1746             continue;
1747          buffer.enable = true;
1748          buffer.pBuffer =
1749             (uint32_t *)(swr_resource_data(ctx->so_targets[i]->buffer) +
1750                          ctx->so_targets[i]->buffer_offset);
1751          buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
1752          buffer.pitch = stream_output->stride[i];
1753          buffer.streamOffset = 0;
1754 
1755          ctx->api.pfnSwrSetSoBuffers(ctx->swrContext, &buffer, i);
1756       }
1757    }
1758 
1759    if (ctx->dirty & (SWR_NEW_CLIP | SWR_NEW_RASTERIZER | SWR_NEW_VS)) {
1760       // shader exporting clip distances overrides all user clip planes
1761       if (ctx->rasterizer->clip_plane_enable &&
1762           !ctx->vs->info.base.num_written_clipdistance)
1763       {
1764          swr_draw_context *pDC = &ctx->swrDC;
1765          memcpy(pDC->userClipPlanes,
1766                 ctx->clip.ucp,
1767                 sizeof(pDC->userClipPlanes));
1768       }
1769    }
1770 
1771    // set up backend state
1772    SWR_BACKEND_STATE backendState = {0};
1773    if (ctx->gs) {
1774       backendState.numAttributes = ctx->gs->info.base.num_outputs - 1;
1775    } else {
1776       backendState.numAttributes = ctx->vs->info.base.num_outputs - 1;
1777       if (ctx->fs->info.base.uses_primid) {
1778          backendState.numAttributes++;
1779          backendState.swizzleEnable = true;
1780          for (unsigned i = 0; i < sizeof(backendState.numComponents); i++) {
1781             backendState.swizzleMap[i].sourceAttrib = i;
1782          }
1783          backendState.swizzleMap[ctx->vs->info.base.num_outputs - 1].constantSource =
1784             SWR_CONSTANT_SOURCE_PRIM_ID;
1785          backendState.swizzleMap[ctx->vs->info.base.num_outputs - 1].componentOverrideMask = 1;
1786       }
1787    }
1788    if (ctx->rasterizer->sprite_coord_enable)
1789       backendState.numAttributes++;
1790 
1791    backendState.numAttributes = std::min((size_t)backendState.numAttributes,
1792                                          sizeof(backendState.numComponents));
1793    for (unsigned i = 0; i < backendState.numAttributes; i++)
1794       backendState.numComponents[i] = 4;
1795    backendState.constantInterpolationMask = ctx->fs->constantMask |
1796       (ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0);
1797    backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
1798 
1799    struct tgsi_shader_info *pLastFE =
1800       ctx->gs ?
1801       &ctx->gs->info.base :
1802       &ctx->vs->info.base;
1803    backendState.readRenderTargetArrayIndex = pLastFE->writes_layer;
1804    backendState.readViewportArrayIndex = pLastFE->writes_viewport_index;
1805    backendState.vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
1806 
1807    backendState.clipDistanceMask =
1808       ctx->vs->info.base.num_written_clipdistance ?
1809       ctx->vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
1810       ctx->rasterizer->clip_plane_enable;
1811 
1812    backendState.cullDistanceMask =
1813       ctx->vs->info.base.culldist_writemask << ctx->vs->info.base.num_written_clipdistance;
1814 
1815    // Assume old layout of SGV, POSITION, CLIPCULL, ATTRIB
1816    backendState.vertexClipCullOffset = backendState.vertexAttribOffset - 2;
1817 
1818    ctx->api.pfnSwrSetBackendState(ctx->swrContext, &backendState);
1819 
1820    /* Ensure that any in-progress attachment change StoreTiles finish */
1821    if (swr_is_fence_pending(screen->flush_fence))
1822       swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
1823 
1824    /* Finally, update the in-use status of all resources involved in draw */
1825    swr_update_resource_status(pipe, p_draw_info);
1826 
1827    ctx->dirty = post_update_dirty_flags;
1828 }
1829 
1830 
1831 static struct pipe_stream_output_target *
swr_create_so_target(struct pipe_context * pipe,struct pipe_resource * buffer,unsigned buffer_offset,unsigned buffer_size)1832 swr_create_so_target(struct pipe_context *pipe,
1833                      struct pipe_resource *buffer,
1834                      unsigned buffer_offset,
1835                      unsigned buffer_size)
1836 {
1837    struct pipe_stream_output_target *target;
1838 
1839    target = CALLOC_STRUCT(pipe_stream_output_target);
1840    if (!target)
1841       return NULL;
1842 
1843    target->context = pipe;
1844    target->reference.count = 1;
1845    pipe_resource_reference(&target->buffer, buffer);
1846    target->buffer_offset = buffer_offset;
1847    target->buffer_size = buffer_size;
1848    return target;
1849 }
1850 
1851 static void
swr_destroy_so_target(struct pipe_context * pipe,struct pipe_stream_output_target * target)1852 swr_destroy_so_target(struct pipe_context *pipe,
1853                       struct pipe_stream_output_target *target)
1854 {
1855    pipe_resource_reference(&target->buffer, NULL);
1856    FREE(target);
1857 }
1858 
1859 static void
swr_set_so_targets(struct pipe_context * pipe,unsigned num_targets,struct pipe_stream_output_target ** targets,const unsigned * offsets)1860 swr_set_so_targets(struct pipe_context *pipe,
1861                    unsigned num_targets,
1862                    struct pipe_stream_output_target **targets,
1863                    const unsigned *offsets)
1864 {
1865    struct swr_context *swr = swr_context(pipe);
1866    uint32_t i;
1867 
1868    assert(num_targets <= MAX_SO_STREAMS);
1869 
1870    for (i = 0; i < num_targets; i++) {
1871       pipe_so_target_reference(
1872          (struct pipe_stream_output_target **)&swr->so_targets[i],
1873          targets[i]);
1874    }
1875 
1876    for (/* fall-through */; i < swr->num_so_targets; i++) {
1877       pipe_so_target_reference(
1878          (struct pipe_stream_output_target **)&swr->so_targets[i], NULL);
1879    }
1880 
1881    swr->num_so_targets = num_targets;
1882 
1883    swr->dirty |= SWR_NEW_SO;
1884 }
1885 
1886 
1887 void
swr_state_init(struct pipe_context * pipe)1888 swr_state_init(struct pipe_context *pipe)
1889 {
1890    pipe->create_blend_state = swr_create_blend_state;
1891    pipe->bind_blend_state = swr_bind_blend_state;
1892    pipe->delete_blend_state = swr_delete_blend_state;
1893 
1894    pipe->create_depth_stencil_alpha_state = swr_create_depth_stencil_state;
1895    pipe->bind_depth_stencil_alpha_state = swr_bind_depth_stencil_state;
1896    pipe->delete_depth_stencil_alpha_state = swr_delete_depth_stencil_state;
1897 
1898    pipe->create_rasterizer_state = swr_create_rasterizer_state;
1899    pipe->bind_rasterizer_state = swr_bind_rasterizer_state;
1900    pipe->delete_rasterizer_state = swr_delete_rasterizer_state;
1901 
1902    pipe->create_sampler_state = swr_create_sampler_state;
1903    pipe->bind_sampler_states = swr_bind_sampler_states;
1904    pipe->delete_sampler_state = swr_delete_sampler_state;
1905 
1906    pipe->create_sampler_view = swr_create_sampler_view;
1907    pipe->set_sampler_views = swr_set_sampler_views;
1908    pipe->sampler_view_destroy = swr_sampler_view_destroy;
1909 
1910    pipe->create_vs_state = swr_create_vs_state;
1911    pipe->bind_vs_state = swr_bind_vs_state;
1912    pipe->delete_vs_state = swr_delete_vs_state;
1913 
1914    pipe->create_fs_state = swr_create_fs_state;
1915    pipe->bind_fs_state = swr_bind_fs_state;
1916    pipe->delete_fs_state = swr_delete_fs_state;
1917 
1918    pipe->create_gs_state = swr_create_gs_state;
1919    pipe->bind_gs_state = swr_bind_gs_state;
1920    pipe->delete_gs_state = swr_delete_gs_state;
1921 
1922    pipe->set_constant_buffer = swr_set_constant_buffer;
1923 
1924    pipe->create_vertex_elements_state = swr_create_vertex_elements_state;
1925    pipe->bind_vertex_elements_state = swr_bind_vertex_elements_state;
1926    pipe->delete_vertex_elements_state = swr_delete_vertex_elements_state;
1927 
1928    pipe->set_vertex_buffers = swr_set_vertex_buffers;
1929 
1930    pipe->set_polygon_stipple = swr_set_polygon_stipple;
1931    pipe->set_clip_state = swr_set_clip_state;
1932    pipe->set_scissor_states = swr_set_scissor_states;
1933    pipe->set_viewport_states = swr_set_viewport_states;
1934 
1935    pipe->set_framebuffer_state = swr_set_framebuffer_state;
1936 
1937    pipe->set_blend_color = swr_set_blend_color;
1938    pipe->set_stencil_ref = swr_set_stencil_ref;
1939 
1940    pipe->set_sample_mask = swr_set_sample_mask;
1941    pipe->get_sample_position = swr_get_sample_position;
1942 
1943    pipe->create_stream_output_target = swr_create_so_target;
1944    pipe->stream_output_target_destroy = swr_destroy_so_target;
1945    pipe->set_stream_output_targets = swr_set_so_targets;
1946 }
1947