1 /*
2  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3  * Copyright 2009 Marek Olšák <maraeo@gmail.com>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 
24 #include "draw/draw_context.h"
25 
26 #include "util/u_framebuffer.h"
27 #include "util/u_half.h"
28 #include "util/u_math.h"
29 #include "util/u_mm.h"
30 #include "util/u_memory.h"
31 #include "util/u_pack_color.h"
32 #include "util/u_transfer.h"
33 
34 #include "tgsi/tgsi_parse.h"
35 
36 #include "pipe/p_config.h"
37 
38 #include "r300_cb.h"
39 #include "r300_context.h"
40 #include "r300_emit.h"
41 #include "r300_reg.h"
42 #include "r300_screen.h"
43 #include "r300_screen_buffer.h"
44 #include "r300_state_inlines.h"
45 #include "r300_fs.h"
46 #include "r300_texture.h"
47 #include "r300_vs.h"
48 
49 /* r300_state: Functions used to intialize state context by translating
50  * Gallium state objects into semi-native r300 state objects. */
51 
52 #define UPDATE_STATE(cso, atom) \
53     if (cso != atom.state) { \
54         atom.state = cso;    \
55         r300_mark_atom_dirty(r300, &(atom));   \
56     }
57 
blend_discard_if_src_alpha_0(unsigned srcRGB,unsigned srcA,unsigned dstRGB,unsigned dstA)58 static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA,
59                                             unsigned dstRGB, unsigned dstA)
60 {
61     /* If the blend equation is ADD or REVERSE_SUBTRACT,
62      * SRC_ALPHA == 0, and the following state is set, the colorbuffer
63      * will not be changed.
64      * Notice that the dst factors are the src factors inverted. */
65     return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
66             srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
67             srcRGB == PIPE_BLENDFACTOR_ZERO) &&
68            (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
69             srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
70             srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
71             srcA == PIPE_BLENDFACTOR_ZERO) &&
72            (dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
73             dstRGB == PIPE_BLENDFACTOR_ONE) &&
74            (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
75             dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
76             dstA == PIPE_BLENDFACTOR_ONE);
77 }
78 
blend_discard_if_src_alpha_1(unsigned srcRGB,unsigned srcA,unsigned dstRGB,unsigned dstA)79 static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA,
80                                             unsigned dstRGB, unsigned dstA)
81 {
82     /* If the blend equation is ADD or REVERSE_SUBTRACT,
83      * SRC_ALPHA == 1, and the following state is set, the colorbuffer
84      * will not be changed.
85      * Notice that the dst factors are the src factors inverted. */
86     return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
87             srcRGB == PIPE_BLENDFACTOR_ZERO) &&
88            (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
89             srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
90             srcA == PIPE_BLENDFACTOR_ZERO) &&
91            (dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
92             dstRGB == PIPE_BLENDFACTOR_ONE) &&
93            (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
94             dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
95             dstA == PIPE_BLENDFACTOR_ONE);
96 }
97 
blend_discard_if_src_color_0(unsigned srcRGB,unsigned srcA,unsigned dstRGB,unsigned dstA)98 static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA,
99                                             unsigned dstRGB, unsigned dstA)
100 {
101     /* If the blend equation is ADD or REVERSE_SUBTRACT,
102      * SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer
103      * will not be changed.
104      * Notice that the dst factors are the src factors inverted. */
105     return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
106             srcRGB == PIPE_BLENDFACTOR_ZERO) &&
107            (srcA == PIPE_BLENDFACTOR_ZERO) &&
108            (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
109             dstRGB == PIPE_BLENDFACTOR_ONE) &&
110            (dstA == PIPE_BLENDFACTOR_ONE);
111 }
112 
blend_discard_if_src_color_1(unsigned srcRGB,unsigned srcA,unsigned dstRGB,unsigned dstA)113 static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA,
114                                             unsigned dstRGB, unsigned dstA)
115 {
116     /* If the blend equation is ADD or REVERSE_SUBTRACT,
117      * SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer
118      * will not be changed.
119      * Notice that the dst factors are the src factors inverted. */
120     return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
121             srcRGB == PIPE_BLENDFACTOR_ZERO) &&
122            (srcA == PIPE_BLENDFACTOR_ZERO) &&
123            (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
124             dstRGB == PIPE_BLENDFACTOR_ONE) &&
125            (dstA == PIPE_BLENDFACTOR_ONE);
126 }
127 
blend_discard_if_src_alpha_color_0(unsigned srcRGB,unsigned srcA,unsigned dstRGB,unsigned dstA)128 static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA,
129                                                   unsigned dstRGB, unsigned dstA)
130 {
131     /* If the blend equation is ADD or REVERSE_SUBTRACT,
132      * SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set,
133      * the colorbuffer will not be changed.
134      * Notice that the dst factors are the src factors inverted. */
135     return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
136             srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
137             srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
138             srcRGB == PIPE_BLENDFACTOR_ZERO) &&
139            (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
140             srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
141             srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
142             srcA == PIPE_BLENDFACTOR_ZERO) &&
143            (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
144             dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
145             dstRGB == PIPE_BLENDFACTOR_ONE) &&
146            (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
147             dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
148             dstA == PIPE_BLENDFACTOR_ONE);
149 }
150 
blend_discard_if_src_alpha_color_1(unsigned srcRGB,unsigned srcA,unsigned dstRGB,unsigned dstA)151 static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA,
152                                                   unsigned dstRGB, unsigned dstA)
153 {
154     /* If the blend equation is ADD or REVERSE_SUBTRACT,
155      * SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set,
156      * the colorbuffer will not be changed.
157      * Notice that the dst factors are the src factors inverted. */
158     return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
159             srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
160             srcRGB == PIPE_BLENDFACTOR_ZERO) &&
161            (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
162             srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
163             srcA == PIPE_BLENDFACTOR_ZERO) &&
164            (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
165             dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
166             dstRGB == PIPE_BLENDFACTOR_ONE) &&
167            (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
168             dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
169             dstA == PIPE_BLENDFACTOR_ONE);
170 }
171 
172 /* The hardware colormask is clunky a must be swizzled depending on the format.
173  * This was figured out by trial-and-error. */
bgra_cmask(unsigned mask)174 static unsigned bgra_cmask(unsigned mask)
175 {
176     return ((mask & PIPE_MASK_R) << 2) |
177            ((mask & PIPE_MASK_B) >> 2) |
178            (mask & (PIPE_MASK_G | PIPE_MASK_A));
179 }
180 
rgba_cmask(unsigned mask)181 static unsigned rgba_cmask(unsigned mask)
182 {
183     return mask & PIPE_MASK_RGBA;
184 }
185 
rrrr_cmask(unsigned mask)186 static unsigned rrrr_cmask(unsigned mask)
187 {
188     return (mask & PIPE_MASK_R) |
189            ((mask & PIPE_MASK_R) << 1) |
190            ((mask & PIPE_MASK_R) << 2) |
191            ((mask & PIPE_MASK_R) << 3);
192 }
193 
aaaa_cmask(unsigned mask)194 static unsigned aaaa_cmask(unsigned mask)
195 {
196     return ((mask & PIPE_MASK_A) >> 3) |
197            ((mask & PIPE_MASK_A) >> 2) |
198            ((mask & PIPE_MASK_A) >> 1) |
199            (mask & PIPE_MASK_A);
200 }
201 
grrg_cmask(unsigned mask)202 static unsigned grrg_cmask(unsigned mask)
203 {
204     return ((mask & PIPE_MASK_R) << 1) |
205            ((mask & PIPE_MASK_R) << 2) |
206            ((mask & PIPE_MASK_G) >> 1) |
207            ((mask & PIPE_MASK_G) << 2);
208 }
209 
arra_cmask(unsigned mask)210 static unsigned arra_cmask(unsigned mask)
211 {
212     return ((mask & PIPE_MASK_R) << 1) |
213            ((mask & PIPE_MASK_R) << 2) |
214            ((mask & PIPE_MASK_A) >> 3) |
215            (mask & PIPE_MASK_A);
216 }
217 
218 /* Create a new blend state based on the CSO blend state.
219  *
220  * This encompasses alpha blending, logic/raster ops, and blend dithering. */
r300_create_blend_state(struct pipe_context * pipe,const struct pipe_blend_state * state)221 static void* r300_create_blend_state(struct pipe_context* pipe,
222                                      const struct pipe_blend_state* state)
223 {
224     struct r300_screen* r300screen = r300_screen(pipe->screen);
225     struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state);
226     uint32_t blend_control = 0;       /* R300_RB3D_CBLEND: 0x4e04 */
227     uint32_t blend_control_noclamp = 0;    /* R300_RB3D_CBLEND: 0x4e04 */
228     uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */
229     uint32_t alpha_blend_control_noclamp = 0; /* R300_RB3D_ABLEND: 0x4e08 */
230     uint32_t rop = 0;                 /* R300_RB3D_ROPCNTL: 0x4e18 */
231     uint32_t dither = 0;              /* R300_RB3D_DITHER_CTL: 0x4e50 */
232     int i;
233     CB_LOCALS;
234 
235     blend->state = *state;
236 
237     if (state->rt[0].blend_enable)
238     {
239         unsigned eqRGB = state->rt[0].rgb_func;
240         unsigned srcRGB = state->rt[0].rgb_src_factor;
241         unsigned dstRGB = state->rt[0].rgb_dst_factor;
242 
243         unsigned eqA = state->rt[0].alpha_func;
244         unsigned srcA = state->rt[0].alpha_src_factor;
245         unsigned dstA = state->rt[0].alpha_dst_factor;
246 
247         /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha,
248          * this is just the crappy D3D naming */
249         blend_control = blend_control_noclamp =
250             R300_ALPHA_BLEND_ENABLE |
251             ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) |
252             ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT);
253         blend_control |=
254             r300_translate_blend_function(eqRGB, TRUE);
255         blend_control_noclamp |=
256             r300_translate_blend_function(eqRGB, FALSE);
257 
258         /* Optimization: some operations do not require the destination color.
259          *
260          * When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled,
261          * otherwise blending gives incorrect results. It seems to be
262          * a hardware bug. */
263         if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN ||
264             eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX ||
265             dstRGB != PIPE_BLENDFACTOR_ZERO ||
266             dstA != PIPE_BLENDFACTOR_ZERO ||
267             srcRGB == PIPE_BLENDFACTOR_DST_COLOR ||
268             srcRGB == PIPE_BLENDFACTOR_DST_ALPHA ||
269             srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR ||
270             srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
271             srcA == PIPE_BLENDFACTOR_DST_COLOR ||
272             srcA == PIPE_BLENDFACTOR_DST_ALPHA ||
273             srcA == PIPE_BLENDFACTOR_INV_DST_COLOR ||
274             srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
275             srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) {
276             /* Enable reading from the colorbuffer. */
277             blend_control |= R300_READ_ENABLE;
278             blend_control_noclamp |= R300_READ_ENABLE;
279 
280             if (r300screen->caps.is_r500) {
281                 /* Optimization: Depending on incoming pixels, we can
282                  * conditionally disable the reading in hardware... */
283                 if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN &&
284                     eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) {
285                     /* Disable reading if SRC_ALPHA == 0. */
286                     if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
287                          dstRGB == PIPE_BLENDFACTOR_ZERO) &&
288                         (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
289                          dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
290                          dstA == PIPE_BLENDFACTOR_ZERO) &&
291                         (srcRGB != PIPE_BLENDFACTOR_DST_COLOR &&
292                          srcRGB != PIPE_BLENDFACTOR_DST_ALPHA &&
293                          srcRGB != PIPE_BLENDFACTOR_INV_DST_COLOR &&
294                          srcRGB != PIPE_BLENDFACTOR_INV_DST_ALPHA)) {
295                          blend_control |= R500_SRC_ALPHA_0_NO_READ;
296                     }
297 
298                     /* Disable reading if SRC_ALPHA == 1. */
299                     if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
300                          dstRGB == PIPE_BLENDFACTOR_ZERO) &&
301                         (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
302                          dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
303                          dstA == PIPE_BLENDFACTOR_ZERO) &&
304                         (srcRGB != PIPE_BLENDFACTOR_DST_COLOR &&
305                          srcRGB != PIPE_BLENDFACTOR_DST_ALPHA &&
306                          srcRGB != PIPE_BLENDFACTOR_INV_DST_COLOR &&
307                          srcRGB != PIPE_BLENDFACTOR_INV_DST_ALPHA)) {
308                          blend_control |= R500_SRC_ALPHA_1_NO_READ;
309                     }
310                 }
311             }
312         }
313 
314         /* Optimization: discard pixels which don't change the colorbuffer.
315          *
316          * The code below is non-trivial and some math is involved.
317          *
318          * Discarding pixels must be disabled when FP16 AA is enabled.
319          * This is a hardware bug. Also, this implementation wouldn't work
320          * with FP blending enabled and equation clamping disabled.
321          *
322          * Equations other than ADD are rarely used and therefore won't be
323          * optimized. */
324         if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) &&
325             (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) {
326             /* ADD: X+Y
327              * REVERSE_SUBTRACT: Y-X
328              *
329              * The idea is:
330              * If X = src*srcFactor = 0 and Y = dst*dstFactor = 1,
331              * then CB will not be changed.
332              *
333              * Given the srcFactor and dstFactor variables, we can derive
334              * what src and dst should be equal to and discard appropriate
335              * pixels.
336              */
337             if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) {
338                 blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
339             } else if (blend_discard_if_src_alpha_1(srcRGB, srcA,
340                                                     dstRGB, dstA)) {
341                 blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1;
342             } else if (blend_discard_if_src_color_0(srcRGB, srcA,
343                                                     dstRGB, dstA)) {
344                 blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0;
345             } else if (blend_discard_if_src_color_1(srcRGB, srcA,
346                                                     dstRGB, dstA)) {
347                 blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1;
348             } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA,
349                                                           dstRGB, dstA)) {
350                 blend_control |=
351                     R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0;
352             } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA,
353                                                           dstRGB, dstA)) {
354                 blend_control |=
355                     R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1;
356             }
357         }
358 
359         /* separate alpha */
360         if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
361             blend_control |= R300_SEPARATE_ALPHA_ENABLE;
362             blend_control_noclamp |= R300_SEPARATE_ALPHA_ENABLE;
363             alpha_blend_control = alpha_blend_control_noclamp =
364                 (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) |
365                 (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT);
366             alpha_blend_control |=
367                 r300_translate_blend_function(eqA, TRUE);
368             alpha_blend_control_noclamp |=
369                 r300_translate_blend_function(eqA, FALSE);
370         }
371     }
372 
373     /* PIPE_LOGICOP_* don't need to be translated, fortunately. */
374     if (state->logicop_enable) {
375         rop = R300_RB3D_ROPCNTL_ROP_ENABLE |
376                 (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT;
377     }
378 
379     /* Neither fglrx nor classic r300 ever set this, regardless of dithering
380      * state. Since it's an optional implementation detail, we can leave it
381      * out and never dither.
382      *
383      * This could be revisited if we ever get quality or conformance hints.
384      *
385     if (state->dither) {
386         dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT |
387                         R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT;
388     }
389     */
390 
391     /* Build a command buffer. */
392     {
393         unsigned (*func[COLORMASK_NUM_SWIZZLES])(unsigned) = {
394             bgra_cmask,
395             rgba_cmask,
396             rrrr_cmask,
397             aaaa_cmask,
398             grrg_cmask,
399             arra_cmask
400         };
401 
402         for (i = 0; i < COLORMASK_NUM_SWIZZLES; i++) {
403             BEGIN_CB(blend->cb_clamp[i], 8);
404             OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
405             OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
406             OUT_CB(blend_control);
407             OUT_CB(alpha_blend_control);
408             OUT_CB(func[i](state->rt[0].colormask));
409             OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
410             END_CB;
411         }
412     }
413 
414     /* Build a command buffer. */
415     BEGIN_CB(blend->cb_noclamp, 8);
416     OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
417     OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
418     OUT_CB(blend_control_noclamp);
419     OUT_CB(alpha_blend_control_noclamp);
420     OUT_CB(rgba_cmask(state->rt[0].colormask));
421     OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
422     END_CB;
423 
424     /* The same as above, but with no colorbuffer reads and writes. */
425     BEGIN_CB(blend->cb_no_readwrite, 8);
426     OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
427     OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
428     OUT_CB(0);
429     OUT_CB(0);
430     OUT_CB(0);
431     OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
432     END_CB;
433 
434     return (void*)blend;
435 }
436 
437 /* Bind blend state. */
r300_bind_blend_state(struct pipe_context * pipe,void * state)438 static void r300_bind_blend_state(struct pipe_context* pipe,
439                                   void* state)
440 {
441     struct r300_context* r300 = r300_context(pipe);
442 
443     UPDATE_STATE(state, r300->blend_state);
444 }
445 
446 /* Free blend state. */
r300_delete_blend_state(struct pipe_context * pipe,void * state)447 static void r300_delete_blend_state(struct pipe_context* pipe,
448                                     void* state)
449 {
450     FREE(state);
451 }
452 
453 /* Convert float to 10bit integer */
float_to_fixed10(float f)454 static unsigned float_to_fixed10(float f)
455 {
456     return CLAMP((unsigned)(f * 1023.9f), 0, 1023);
457 }
458 
459 /* Set blend color.
460  * Setup both R300 and R500 registers, figure out later which one to write. */
r300_set_blend_color(struct pipe_context * pipe,const struct pipe_blend_color * color)461 static void r300_set_blend_color(struct pipe_context* pipe,
462                                  const struct pipe_blend_color* color)
463 {
464     struct r300_context* r300 = r300_context(pipe);
465     struct pipe_framebuffer_state *fb = r300->fb_state.state;
466     struct r300_blend_color_state *state =
467         (struct r300_blend_color_state*)r300->blend_color_state.state;
468     struct pipe_blend_color c;
469     enum pipe_format format = fb->nr_cbufs ? fb->cbufs[0]->format : 0;
470     float tmp;
471     CB_LOCALS;
472 
473     state->state = *color; /* Save it, so that we can reuse it in set_fb_state */
474     c = *color;
475 
476     /* The blend color is dependent on the colorbuffer format. */
477     if (fb->nr_cbufs) {
478         switch (format) {
479         case PIPE_FORMAT_R8_UNORM:
480         case PIPE_FORMAT_L8_UNORM:
481         case PIPE_FORMAT_I8_UNORM:
482             c.color[1] = c.color[0];
483             break;
484 
485         case PIPE_FORMAT_A8_UNORM:
486             c.color[1] = c.color[3];
487             break;
488 
489         case PIPE_FORMAT_R8G8_UNORM:
490             c.color[2] = c.color[1];
491             break;
492 
493         case PIPE_FORMAT_L8A8_UNORM:
494             c.color[2] = c.color[3];
495             break;
496 
497         case PIPE_FORMAT_R8G8B8A8_UNORM:
498         case PIPE_FORMAT_R8G8B8X8_UNORM:
499             tmp = c.color[0];
500             c.color[0] = c.color[2];
501             c.color[2] = tmp;
502             break;
503 
504         default:;
505         }
506     }
507 
508     if (r300->screen->caps.is_r500) {
509         BEGIN_CB(state->cb, 3);
510         OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);
511 
512         switch (format) {
513         case PIPE_FORMAT_R16G16B16A16_FLOAT:
514             OUT_CB(util_float_to_half(c.color[2]) |
515                    (util_float_to_half(c.color[3]) << 16));
516             OUT_CB(util_float_to_half(c.color[0]) |
517                    (util_float_to_half(c.color[1]) << 16));
518             break;
519 
520         default:
521             OUT_CB(float_to_fixed10(c.color[0]) |
522                    (float_to_fixed10(c.color[3]) << 16));
523             OUT_CB(float_to_fixed10(c.color[2]) |
524                    (float_to_fixed10(c.color[1]) << 16));
525         }
526 
527         END_CB;
528     } else {
529         union util_color uc;
530         util_pack_color(c.color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
531 
532         BEGIN_CB(state->cb, 2);
533         OUT_CB_REG(R300_RB3D_BLEND_COLOR, uc.ui);
534         END_CB;
535     }
536 
537     r300_mark_atom_dirty(r300, &r300->blend_color_state);
538 }
539 
r300_set_clip_state(struct pipe_context * pipe,const struct pipe_clip_state * state)540 static void r300_set_clip_state(struct pipe_context* pipe,
541                                 const struct pipe_clip_state* state)
542 {
543     struct r300_context* r300 = r300_context(pipe);
544     struct r300_clip_state *clip =
545             (struct r300_clip_state*)r300->clip_state.state;
546     CB_LOCALS;
547 
548     if (r300->screen->caps.has_tcl) {
549         BEGIN_CB(clip->cb, r300->clip_state.size);
550         OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG,
551                    (r300->screen->caps.is_r500 ?
552                     R500_PVS_UCP_START : R300_PVS_UCP_START));
553         OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4);
554         OUT_CB_TABLE(state->ucp, 6 * 4);
555         END_CB;
556 
557         r300_mark_atom_dirty(r300, &r300->clip_state);
558     } else {
559         draw_set_clip_state(r300->draw, state);
560     }
561 }
562 
563 static void
r300_set_sample_mask(struct pipe_context * pipe,unsigned sample_mask)564 r300_set_sample_mask(struct pipe_context *pipe,
565                      unsigned sample_mask)
566 {
567 }
568 
569 
570 /* Create a new depth, stencil, and alpha state based on the CSO dsa state.
571  *
572  * This contains the depth buffer, stencil buffer, alpha test, and such.
573  * On the Radeon, depth and stencil buffer setup are intertwined, which is
574  * the reason for some of the strange-looking assignments across registers. */
575 static void*
r300_create_dsa_state(struct pipe_context * pipe,const struct pipe_depth_stencil_alpha_state * state)576         r300_create_dsa_state(struct pipe_context* pipe,
577                               const struct pipe_depth_stencil_alpha_state* state)
578 {
579     struct r300_capabilities *caps = &r300_screen(pipe->screen)->caps;
580     struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state);
581     CB_LOCALS;
582 
583     dsa->dsa = *state;
584 
585     /* Depth test setup. - separate write mask depth for decomp flush */
586     if (state->depth.writemask) {
587         dsa->z_buffer_control |= R300_Z_WRITE_ENABLE;
588     }
589 
590     if (state->depth.enabled) {
591         dsa->z_buffer_control |= R300_Z_ENABLE;
592 
593         dsa->z_stencil_control |=
594             (r300_translate_depth_stencil_function(state->depth.func) <<
595                 R300_Z_FUNC_SHIFT);
596     } else {
597         /* We must enable depth test, otherwise occlusion queries won't work. */
598         dsa->z_buffer_control |= R300_Z_ENABLE;
599         dsa->z_stencil_control |= R300_ZS_ALWAYS;
600     }
601 
602     /* Stencil buffer setup. */
603     if (state->stencil[0].enabled) {
604         dsa->z_buffer_control |= R300_STENCIL_ENABLE;
605         dsa->z_stencil_control |=
606             (r300_translate_depth_stencil_function(state->stencil[0].func) <<
607                 R300_S_FRONT_FUNC_SHIFT) |
608             (r300_translate_stencil_op(state->stencil[0].fail_op) <<
609                 R300_S_FRONT_SFAIL_OP_SHIFT) |
610             (r300_translate_stencil_op(state->stencil[0].zpass_op) <<
611                 R300_S_FRONT_ZPASS_OP_SHIFT) |
612             (r300_translate_stencil_op(state->stencil[0].zfail_op) <<
613                 R300_S_FRONT_ZFAIL_OP_SHIFT);
614 
615         dsa->stencil_ref_mask =
616                 (state->stencil[0].valuemask << R300_STENCILMASK_SHIFT) |
617                 (state->stencil[0].writemask << R300_STENCILWRITEMASK_SHIFT);
618 
619         if (state->stencil[1].enabled) {
620             dsa->two_sided = TRUE;
621 
622             dsa->z_buffer_control |= R300_STENCIL_FRONT_BACK;
623             dsa->z_stencil_control |=
624             (r300_translate_depth_stencil_function(state->stencil[1].func) <<
625                 R300_S_BACK_FUNC_SHIFT) |
626             (r300_translate_stencil_op(state->stencil[1].fail_op) <<
627                 R300_S_BACK_SFAIL_OP_SHIFT) |
628             (r300_translate_stencil_op(state->stencil[1].zpass_op) <<
629                 R300_S_BACK_ZPASS_OP_SHIFT) |
630             (r300_translate_stencil_op(state->stencil[1].zfail_op) <<
631                 R300_S_BACK_ZFAIL_OP_SHIFT);
632 
633             dsa->stencil_ref_bf =
634                 (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) |
635                 (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT);
636 
637             if (caps->is_r500) {
638                 dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK;
639             } else {
640                 dsa->two_sided_stencil_ref =
641                   (state->stencil[0].valuemask != state->stencil[1].valuemask ||
642                    state->stencil[0].writemask != state->stencil[1].writemask);
643             }
644         }
645     }
646 
647     /* Alpha test setup. */
648     if (state->alpha.enabled) {
649         dsa->alpha_function =
650             r300_translate_alpha_function(state->alpha.func) |
651             R300_FG_ALPHA_FUNC_ENABLE;
652 
653         dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value);
654         dsa->alpha_value = util_float_to_half(state->alpha.ref_value);
655 
656         if (caps->is_r500) {
657             dsa->alpha_function_fp16 = dsa->alpha_function |
658                                        R500_FG_ALPHA_FUNC_FP16_ENABLE;
659             dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT;
660         }
661     }
662 
663     BEGIN_CB(&dsa->cb_begin, 10);
664     OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
665     OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
666     OUT_CB(dsa->z_buffer_control);
667     OUT_CB(dsa->z_stencil_control);
668     OUT_CB(dsa->stencil_ref_mask);
669     OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf);
670     OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value);
671     END_CB;
672 
673     BEGIN_CB(&dsa->cb_begin_fp16, 10);
674     OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function_fp16);
675     OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
676     OUT_CB(dsa->z_buffer_control);
677     OUT_CB(dsa->z_stencil_control);
678     OUT_CB(dsa->stencil_ref_mask);
679     OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf);
680     OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value);
681     END_CB;
682 
683     /* We must enable depth test, otherwise occlusion queries won't work.
684      * We setup a dummy zbuffer to silent the CS checker, see emit_fb_state. */
685     BEGIN_CB(dsa->cb_zb_no_readwrite, 10);
686     OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
687     OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
688     OUT_CB(R300_Z_ENABLE);
689     OUT_CB(R300_ZS_ALWAYS);
690     OUT_CB(0);
691     OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0);
692     OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value);
693     END_CB;
694 
695     BEGIN_CB(dsa->cb_fp16_zb_no_readwrite, 10);
696     OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function_fp16);
697     OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
698     OUT_CB(R300_Z_ENABLE);
699     OUT_CB(R300_ZS_ALWAYS);
700     OUT_CB(0);
701     OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0);
702     OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value);
703     END_CB;
704 
705     return (void*)dsa;
706 }
707 
r300_dsa_inject_stencilref(struct r300_context * r300)708 static void r300_dsa_inject_stencilref(struct r300_context *r300)
709 {
710     struct r300_dsa_state *dsa =
711             (struct r300_dsa_state*)r300->dsa_state.state;
712 
713     if (!dsa)
714         return;
715 
716     dsa->stencil_ref_mask =
717         (dsa->stencil_ref_mask & ~R300_STENCILREF_MASK) |
718         r300->stencil_ref.ref_value[0];
719     dsa->stencil_ref_bf =
720         (dsa->stencil_ref_bf & ~R300_STENCILREF_MASK) |
721         r300->stencil_ref.ref_value[1];
722 }
723 
724 /* Bind DSA state. */
r300_bind_dsa_state(struct pipe_context * pipe,void * state)725 static void r300_bind_dsa_state(struct pipe_context* pipe,
726                                 void* state)
727 {
728     struct r300_context* r300 = r300_context(pipe);
729 
730     if (!state) {
731         return;
732     }
733 
734     UPDATE_STATE(state, r300->dsa_state);
735 
736     r300_mark_atom_dirty(r300, &r300->hyperz_state); /* Will be updated before the emission. */
737     r300_dsa_inject_stencilref(r300);
738 }
739 
740 /* Free DSA state. */
r300_delete_dsa_state(struct pipe_context * pipe,void * state)741 static void r300_delete_dsa_state(struct pipe_context* pipe,
742                                   void* state)
743 {
744     FREE(state);
745 }
746 
r300_set_stencil_ref(struct pipe_context * pipe,const struct pipe_stencil_ref * sr)747 static void r300_set_stencil_ref(struct pipe_context* pipe,
748                                  const struct pipe_stencil_ref* sr)
749 {
750     struct r300_context* r300 = r300_context(pipe);
751 
752     r300->stencil_ref = *sr;
753 
754     r300_dsa_inject_stencilref(r300);
755     r300_mark_atom_dirty(r300, &r300->dsa_state);
756 }
757 
r300_tex_set_tiling_flags(struct r300_context * r300,struct r300_resource * tex,unsigned level)758 static void r300_tex_set_tiling_flags(struct r300_context *r300,
759                                       struct r300_resource *tex,
760                                       unsigned level)
761 {
762     /* Check if the macrotile flag needs to be changed.
763      * Skip changing the flags otherwise. */
764     if (tex->tex.macrotile[tex->surface_level] !=
765         tex->tex.macrotile[level]) {
766         r300->rws->buffer_set_tiling(tex->buf, r300->cs,
767                 tex->tex.microtile, tex->tex.macrotile[level],
768                 0, 0, 0, 0, 0,
769                 tex->tex.stride_in_bytes[0]);
770 
771         tex->surface_level = level;
772     }
773 }
774 
775 /* This switcheroo is needed just because of goddamned MACRO_SWITCH. */
r300_fb_set_tiling_flags(struct r300_context * r300,const struct pipe_framebuffer_state * state)776 static void r300_fb_set_tiling_flags(struct r300_context *r300,
777                                const struct pipe_framebuffer_state *state)
778 {
779     unsigned i;
780 
781     /* Set tiling flags for new surfaces. */
782     for (i = 0; i < state->nr_cbufs; i++) {
783         r300_tex_set_tiling_flags(r300,
784                                   r300_resource(state->cbufs[i]->texture),
785                                   state->cbufs[i]->u.tex.level);
786     }
787     if (state->zsbuf) {
788         r300_tex_set_tiling_flags(r300,
789                                   r300_resource(state->zsbuf->texture),
790                                   state->zsbuf->u.tex.level);
791     }
792 }
793 
r300_print_fb_surf_info(struct pipe_surface * surf,unsigned index,const char * binding)794 static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index,
795                                     const char *binding)
796 {
797     struct pipe_resource *tex = surf->texture;
798     struct r300_resource *rtex = r300_resource(tex);
799 
800     fprintf(stderr,
801             "r300:   %s[%i] Dim: %ix%i, Firstlayer: %i, "
802             "Lastlayer: %i, Level: %i, Format: %s\n"
803 
804             "r300:     TEX: Macro: %s, Micro: %s, "
805             "Dim: %ix%ix%i, LastLevel: %i, Format: %s\n",
806 
807             binding, index, surf->width, surf->height,
808             surf->u.tex.first_layer, surf->u.tex.last_layer, surf->u.tex.level,
809             util_format_short_name(surf->format),
810 
811             rtex->tex.macrotile[0] ? "YES" : " NO",
812             rtex->tex.microtile ? "YES" : " NO",
813             tex->width0, tex->height0, tex->depth0,
814             tex->last_level, util_format_short_name(surf->format));
815 }
816 
r300_mark_fb_state_dirty(struct r300_context * r300,enum r300_fb_state_change change)817 void r300_mark_fb_state_dirty(struct r300_context *r300,
818                               enum r300_fb_state_change change)
819 {
820     struct pipe_framebuffer_state *state = r300->fb_state.state;
821 
822     r300_mark_atom_dirty(r300, &r300->gpu_flush);
823     r300_mark_atom_dirty(r300, &r300->fb_state);
824 
825     /* What is marked as dirty depends on the enum r300_fb_state_change. */
826     if (change == R300_CHANGED_FB_STATE) {
827         r300_mark_atom_dirty(r300, &r300->aa_state);
828         r300_mark_atom_dirty(r300, &r300->dsa_state); /* for AlphaRef */
829         r300_set_blend_color(&r300->context, r300->blend_color_state.state);
830     }
831 
832     if (change == R300_CHANGED_FB_STATE ||
833         change == R300_CHANGED_HYPERZ_FLAG) {
834         r300_mark_atom_dirty(r300, &r300->hyperz_state);
835     }
836 
837     if (change == R300_CHANGED_FB_STATE ||
838         change == R300_CHANGED_MULTIWRITE) {
839         r300_mark_atom_dirty(r300, &r300->fb_state_pipelined);
840     }
841 
842     /* Now compute the fb_state atom size. */
843     r300->fb_state.size = 2 + (8 * state->nr_cbufs);
844 
845     if (r300->cbzb_clear) {
846         r300->fb_state.size += 10;
847     } else if (state->zsbuf) {
848         r300->fb_state.size += 10;
849         if (r300->hyperz_enabled)
850             r300->fb_state.size += 8;
851     } else if (state->nr_cbufs) {
852         r300->fb_state.size += 10;
853     }
854 
855     /* The size of the rest of atoms stays the same. */
856 }
857 
858 static void
r300_set_framebuffer_state(struct pipe_context * pipe,const struct pipe_framebuffer_state * state)859 r300_set_framebuffer_state(struct pipe_context* pipe,
860                            const struct pipe_framebuffer_state* state)
861 {
862     struct r300_context* r300 = r300_context(pipe);
863     struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
864     struct pipe_framebuffer_state *old_state = r300->fb_state.state;
865     unsigned max_width, max_height, i;
866     uint32_t zbuffer_bpp = 0;
867     boolean unlock_zbuffer = FALSE;
868 
869     if (r300->screen->caps.is_r500) {
870         max_width = max_height = 4096;
871     } else if (r300->screen->caps.is_r400) {
872         max_width = max_height = 4021;
873     } else {
874         max_width = max_height = 2560;
875     }
876 
877     if (state->width > max_width || state->height > max_height) {
878         fprintf(stderr, "r300: Implementation error: Render targets are too "
879         "big in %s, refusing to bind framebuffer state!\n", __FUNCTION__);
880         return;
881     }
882 
883     if (old_state->zsbuf && r300->zmask_in_use && !r300->locked_zbuffer) {
884         /* There is a zmask in use, what are we gonna do? */
885         if (state->zsbuf) {
886             if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) {
887                 /* Decompress the currently bound zbuffer before we bind another one. */
888                 r300_decompress_zmask(r300);
889                 r300->hiz_in_use = FALSE;
890             }
891         } else {
892             /* We don't bind another zbuffer, so lock the current one. */
893             pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf);
894         }
895     } else if (r300->locked_zbuffer) {
896         /* We have a locked zbuffer now, what are we gonna do? */
897         if (state->zsbuf) {
898             if (!pipe_surface_equal(r300->locked_zbuffer, state->zsbuf)) {
899                 /* We are binding some other zbuffer, so decompress the locked one,
900                  * it gets unlocked automatically. */
901                 r300_decompress_zmask_locked_unsafe(r300);
902                 r300->hiz_in_use = FALSE;
903             } else {
904                 /* We are binding the locked zbuffer again, so unlock it. */
905                 unlock_zbuffer = TRUE;
906             }
907         }
908     }
909     assert(state->zsbuf || (r300->locked_zbuffer && !unlock_zbuffer) || !r300->zmask_in_use);
910 
911     /* Need to reset clamping or colormask. */
912     r300_mark_atom_dirty(r300, &r300->blend_state);
913 
914     /* Re-swizzle the blend color. */
915     r300_set_blend_color(pipe, &((struct r300_blend_color_state*)r300->blend_color_state.state)->state);
916 
917     /* If zsbuf is set from NULL to non-NULL or vice versa.. */
918     if (!!old_state->zsbuf != !!state->zsbuf) {
919         r300_mark_atom_dirty(r300, &r300->dsa_state);
920     }
921 
922     if (r300->screen->info.drm_minor < 12) {
923        /* The tiling flags are dependent on the surface miplevel, unfortunately.
924         * This workarounds a bad design decision in old kernels which were
925         * rewriting tile fields in registers. */
926         r300_fb_set_tiling_flags(r300, state);
927     }
928 
929     util_copy_framebuffer_state(r300->fb_state.state, state);
930 
931     if (unlock_zbuffer) {
932         pipe_surface_reference(&r300->locked_zbuffer, NULL);
933     }
934 
935     r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
936 
937     if (state->zsbuf) {
938         switch (util_format_get_blocksize(state->zsbuf->format)) {
939         case 2:
940             zbuffer_bpp = 16;
941             break;
942         case 4:
943             zbuffer_bpp = 24;
944             break;
945         }
946 
947         /* Polygon offset depends on the zbuffer bit depth. */
948         if (r300->zbuffer_bpp != zbuffer_bpp) {
949             r300->zbuffer_bpp = zbuffer_bpp;
950 
951             if (r300->polygon_offset_enabled)
952                 r300_mark_atom_dirty(r300, &r300->rs_state);
953         }
954     }
955 
956     /* Set up AA config. */
957     if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) {
958         aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE;
959 
960         switch (state->cbufs[0]->texture->nr_samples) {
961         case 2:
962             aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
963             break;
964         case 3:
965             aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3;
966             break;
967         case 4:
968             aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
969             break;
970         case 6:
971             aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
972             break;
973         }
974     } else {
975         aa->aa_config = 0;
976     }
977 
978     if (DBG_ON(r300, DBG_FB)) {
979         fprintf(stderr, "r300: set_framebuffer_state:\n");
980         for (i = 0; i < state->nr_cbufs; i++) {
981             r300_print_fb_surf_info(state->cbufs[i], i, "CB");
982         }
983         if (state->zsbuf) {
984             r300_print_fb_surf_info(state->zsbuf, 0, "ZB");
985         }
986     }
987 }
988 
989 /* Create fragment shader state. */
r300_create_fs_state(struct pipe_context * pipe,const struct pipe_shader_state * shader)990 static void* r300_create_fs_state(struct pipe_context* pipe,
991                                   const struct pipe_shader_state* shader)
992 {
993     struct r300_fragment_shader* fs = NULL;
994 
995     fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader);
996 
997     /* Copy state directly into shader. */
998     fs->state = *shader;
999     fs->state.tokens = tgsi_dup_tokens(shader->tokens);
1000 
1001     return (void*)fs;
1002 }
1003 
r300_mark_fs_code_dirty(struct r300_context * r300)1004 void r300_mark_fs_code_dirty(struct r300_context *r300)
1005 {
1006     struct r300_fragment_shader* fs = r300_fs(r300);
1007 
1008     r300_mark_atom_dirty(r300, &r300->fs);
1009     r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
1010     r300_mark_atom_dirty(r300, &r300->fs_constants);
1011     r300->fs.size = fs->shader->cb_code_size;
1012 
1013     if (r300->screen->caps.is_r500) {
1014         r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 7;
1015         r300->fs_constants.size = fs->shader->externals_count * 4 + 3;
1016     } else {
1017         r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 5;
1018         r300->fs_constants.size = fs->shader->externals_count * 4 + 1;
1019     }
1020 
1021     ((struct r300_constant_buffer*)r300->fs_constants.state)->remap_table =
1022             fs->shader->code.constants_remap_table;
1023 }
1024 
1025 /* Bind fragment shader state. */
r300_bind_fs_state(struct pipe_context * pipe,void * shader)1026 static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
1027 {
1028     struct r300_context* r300 = r300_context(pipe);
1029     struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
1030 
1031     if (fs == NULL) {
1032         r300->fs.state = NULL;
1033         return;
1034     }
1035 
1036     r300->fs.state = fs;
1037     r300->fs_status = FRAGMENT_SHADER_DIRTY;
1038 
1039     r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */
1040 }
1041 
1042 /* Delete fragment shader state. */
r300_delete_fs_state(struct pipe_context * pipe,void * shader)1043 static void r300_delete_fs_state(struct pipe_context* pipe, void* shader)
1044 {
1045     struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
1046     struct r300_fragment_shader_code *tmp, *ptr = fs->first;
1047 
1048     while (ptr) {
1049         tmp = ptr;
1050         ptr = ptr->next;
1051         rc_constants_destroy(&tmp->code.constants);
1052         FREE(tmp->cb_code);
1053         FREE(tmp);
1054     }
1055     FREE((void*)fs->state.tokens);
1056     FREE(shader);
1057 }
1058 
r300_set_polygon_stipple(struct pipe_context * pipe,const struct pipe_poly_stipple * state)1059 static void r300_set_polygon_stipple(struct pipe_context* pipe,
1060                                      const struct pipe_poly_stipple* state)
1061 {
1062     /* XXX no idea how to set this up, but not terribly important */
1063 }
1064 
1065 /* Create a new rasterizer state based on the CSO rasterizer state.
1066  *
1067  * This is a very large chunk of state, and covers most of the graphics
1068  * backend (GB), geometry assembly (GA), and setup unit (SU) blocks.
1069  *
1070  * In a not entirely unironic sidenote, this state has nearly nothing to do
1071  * with the actual block on the Radeon called the rasterizer (RS). */
r300_create_rs_state(struct pipe_context * pipe,const struct pipe_rasterizer_state * state)1072 static void* r300_create_rs_state(struct pipe_context* pipe,
1073                                   const struct pipe_rasterizer_state* state)
1074 {
1075     struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state);
1076     uint32_t vap_control_status;    /* R300_VAP_CNTL_STATUS: 0x2140 */
1077     uint32_t vap_clip_cntl;         /* R300_VAP_CLIP_CNTL: 0x221C */
1078     uint32_t point_size;            /* R300_GA_POINT_SIZE: 0x421c */
1079     uint32_t point_minmax;          /* R300_GA_POINT_MINMAX: 0x4230 */
1080     uint32_t line_control;          /* R300_GA_LINE_CNTL: 0x4234 */
1081     uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */
1082     uint32_t cull_mode;             /* R300_SU_CULL_MODE: 0x42b8 */
1083     uint32_t line_stipple_config;   /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
1084     uint32_t line_stipple_value;    /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
1085     uint32_t polygon_mode;          /* R300_GA_POLY_MODE: 0x4288 */
1086     uint32_t clip_rule;             /* R300_SC_CLIP_RULE: 0x43D0 */
1087     uint32_t round_mode;            /* R300_GA_ROUND_MODE: 0x428c */
1088 
1089     /* Point sprites texture coordinates, 0: lower left, 1: upper right */
1090     float point_texcoord_left = 0;  /* R300_GA_POINT_S0: 0x4200 */
1091     float point_texcoord_bottom = 0;/* R300_GA_POINT_T0: 0x4204 */
1092     float point_texcoord_right = 1; /* R300_GA_POINT_S1: 0x4208 */
1093     float point_texcoord_top = 0;   /* R300_GA_POINT_T1: 0x420c */
1094     boolean vclamp = state->clamp_vertex_color ||
1095                      !r300_context(pipe)->screen->caps.is_r500;
1096     CB_LOCALS;
1097 
1098     /* Copy rasterizer state. */
1099     rs->rs = *state;
1100     rs->rs_draw = *state;
1101 
1102     rs->rs.sprite_coord_enable = state->point_quad_rasterization *
1103                                  state->sprite_coord_enable;
1104 
1105     /* Override some states for Draw. */
1106     rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */
1107     rs->rs_draw.offset_point = 0;
1108     rs->rs_draw.offset_line = 0;
1109     rs->rs_draw.offset_tri = 0;
1110     rs->rs_draw.offset_clamp = 0;
1111 
1112 #ifdef PIPE_ARCH_LITTLE_ENDIAN
1113     vap_control_status = R300_VC_NO_SWAP;
1114 #else
1115     vap_control_status = R300_VC_32BIT_SWAP;
1116 #endif
1117 
1118     /* If no TCL engine is present, turn off the HW TCL. */
1119     if (!r300_screen(pipe->screen)->caps.has_tcl) {
1120         vap_control_status |= R300_VAP_TCL_BYPASS;
1121     }
1122 
1123     /* Point size width and height. */
1124     point_size =
1125         pack_float_16_6x(state->point_size) |
1126         (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT);
1127 
1128     /* Point size clamping. */
1129     if (state->point_size_per_vertex) {
1130         /* Per-vertex point size.
1131          * Clamp to [0, max FB size] */
1132         float min_psiz = util_get_min_point_size(state);
1133         float max_psiz = pipe->screen->get_paramf(pipe->screen,
1134                                         PIPE_CAPF_MAX_POINT_WIDTH);
1135         point_minmax =
1136             (pack_float_16_6x(min_psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) |
1137             (pack_float_16_6x(max_psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT);
1138     } else {
1139         /* We cannot disable the point-size vertex output,
1140          * so clamp it. */
1141         float psiz = state->point_size;
1142         point_minmax =
1143             (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) |
1144             (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT);
1145     }
1146 
1147     /* Line control. */
1148     line_control = pack_float_16_6x(state->line_width) |
1149         R300_GA_LINE_CNTL_END_TYPE_COMP;
1150 
1151     /* Enable polygon mode */
1152     polygon_mode = 0;
1153     if (state->fill_front != PIPE_POLYGON_MODE_FILL ||
1154         state->fill_back != PIPE_POLYGON_MODE_FILL) {
1155         polygon_mode = R300_GA_POLY_MODE_DUAL;
1156     }
1157 
1158     /* Front face */
1159     if (state->front_ccw)
1160         cull_mode = R300_FRONT_FACE_CCW;
1161     else
1162         cull_mode = R300_FRONT_FACE_CW;
1163 
1164     /* Polygon offset */
1165     polygon_offset_enable = 0;
1166     if (util_get_offset(state, state->fill_front)) {
1167        polygon_offset_enable |= R300_FRONT_ENABLE;
1168     }
1169     if (util_get_offset(state, state->fill_back)) {
1170        polygon_offset_enable |= R300_BACK_ENABLE;
1171     }
1172 
1173     rs->polygon_offset_enable = polygon_offset_enable != 0;
1174 
1175     /* Polygon mode */
1176     if (polygon_mode) {
1177        polygon_mode |=
1178           r300_translate_polygon_mode_front(state->fill_front);
1179        polygon_mode |=
1180           r300_translate_polygon_mode_back(state->fill_back);
1181     }
1182 
1183     if (state->cull_face & PIPE_FACE_FRONT) {
1184         cull_mode |= R300_CULL_FRONT;
1185     }
1186     if (state->cull_face & PIPE_FACE_BACK) {
1187         cull_mode |= R300_CULL_BACK;
1188     }
1189 
1190     if (state->line_stipple_enable) {
1191         line_stipple_config =
1192             R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE |
1193             (fui((float)state->line_stipple_factor) &
1194                 R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK);
1195         /* XXX this might need to be scaled up */
1196         line_stipple_value = state->line_stipple_pattern;
1197     } else {
1198         line_stipple_config = 0;
1199         line_stipple_value = 0;
1200     }
1201 
1202     if (state->flatshade) {
1203         rs->color_control = R300_SHADE_MODEL_FLAT;
1204     } else {
1205         rs->color_control = R300_SHADE_MODEL_SMOOTH;
1206     }
1207 
1208     clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
1209 
1210     /* Point sprites coord mode */
1211     if (rs->rs.sprite_coord_enable) {
1212         switch (state->sprite_coord_mode) {
1213             case PIPE_SPRITE_COORD_UPPER_LEFT:
1214                 point_texcoord_top = 0.0f;
1215                 point_texcoord_bottom = 1.0f;
1216                 break;
1217             case PIPE_SPRITE_COORD_LOWER_LEFT:
1218                 point_texcoord_top = 1.0f;
1219                 point_texcoord_bottom = 0.0f;
1220                 break;
1221         }
1222     }
1223 
1224     if (r300_screen(pipe->screen)->caps.has_tcl) {
1225        vap_clip_cntl = (state->clip_plane_enable & 63) |
1226                        R300_PS_UCP_MODE_CLIP_AS_TRIFAN;
1227     } else {
1228        vap_clip_cntl = R300_CLIP_DISABLE;
1229     }
1230 
1231     /* Vertex color clamping. FP20 means no clamping. */
1232     round_mode =
1233       R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST |
1234       (!vclamp ? (R300_GA_ROUND_MODE_RGB_CLAMP_FP20 |
1235                   R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20) : 0);
1236 
1237     /* Build the main command buffer. */
1238     BEGIN_CB(rs->cb_main, RS_STATE_MAIN_SIZE);
1239     OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status);
1240     OUT_CB_REG(R300_VAP_CLIP_CNTL, vap_clip_cntl);
1241     OUT_CB_REG(R300_GA_POINT_SIZE, point_size);
1242     OUT_CB_REG_SEQ(R300_GA_POINT_MINMAX, 2);
1243     OUT_CB(point_minmax);
1244     OUT_CB(line_control);
1245     OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2);
1246     OUT_CB(polygon_offset_enable);
1247     rs->cull_mode_index = 11;
1248     OUT_CB(cull_mode);
1249     OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config);
1250     OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value);
1251     OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode);
1252     OUT_CB_REG(R300_GA_ROUND_MODE, round_mode);
1253     OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule);
1254     OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4);
1255     OUT_CB_32F(point_texcoord_left);
1256     OUT_CB_32F(point_texcoord_bottom);
1257     OUT_CB_32F(point_texcoord_right);
1258     OUT_CB_32F(point_texcoord_top);
1259     END_CB;
1260 
1261     /* Build the two command buffers for polygon offset setup. */
1262     if (polygon_offset_enable) {
1263         float scale = state->offset_scale * 12;
1264         float offset = state->offset_units * 4;
1265 
1266         BEGIN_CB(rs->cb_poly_offset_zb16, 5);
1267         OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
1268         OUT_CB_32F(scale);
1269         OUT_CB_32F(offset);
1270         OUT_CB_32F(scale);
1271         OUT_CB_32F(offset);
1272         END_CB;
1273 
1274         offset = state->offset_units * 2;
1275 
1276         BEGIN_CB(rs->cb_poly_offset_zb24, 5);
1277         OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
1278         OUT_CB_32F(scale);
1279         OUT_CB_32F(offset);
1280         OUT_CB_32F(scale);
1281         OUT_CB_32F(offset);
1282         END_CB;
1283     }
1284 
1285     return (void*)rs;
1286 }
1287 
1288 /* Bind rasterizer state. */
r300_bind_rs_state(struct pipe_context * pipe,void * state)1289 static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
1290 {
1291     struct r300_context* r300 = r300_context(pipe);
1292     struct r300_rs_state* rs = (struct r300_rs_state*)state;
1293     int last_sprite_coord_enable = r300->sprite_coord_enable;
1294     boolean last_two_sided_color = r300->two_sided_color;
1295 
1296     if (r300->draw && rs) {
1297         draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state);
1298     }
1299 
1300     if (rs) {
1301         r300->polygon_offset_enabled = rs->polygon_offset_enable;
1302         r300->sprite_coord_enable = rs->rs.sprite_coord_enable;
1303         r300->two_sided_color = rs->rs.light_twoside;
1304     } else {
1305         r300->polygon_offset_enabled = FALSE;
1306         r300->sprite_coord_enable = 0;
1307         r300->two_sided_color = FALSE;
1308     }
1309 
1310     UPDATE_STATE(state, r300->rs_state);
1311     r300->rs_state.size = RS_STATE_MAIN_SIZE + (r300->polygon_offset_enabled ? 5 : 0);
1312 
1313     if (last_sprite_coord_enable != r300->sprite_coord_enable ||
1314         last_two_sided_color != r300->two_sided_color) {
1315         r300_mark_atom_dirty(r300, &r300->rs_block_state);
1316     }
1317 }
1318 
1319 /* Free rasterizer state. */
r300_delete_rs_state(struct pipe_context * pipe,void * state)1320 static void r300_delete_rs_state(struct pipe_context* pipe, void* state)
1321 {
1322     FREE(state);
1323 }
1324 
1325 static void*
r300_create_sampler_state(struct pipe_context * pipe,const struct pipe_sampler_state * state)1326         r300_create_sampler_state(struct pipe_context* pipe,
1327                                   const struct pipe_sampler_state* state)
1328 {
1329     struct r300_context* r300 = r300_context(pipe);
1330     struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state);
1331     boolean is_r500 = r300->screen->caps.is_r500;
1332     int lod_bias;
1333 
1334     sampler->state = *state;
1335 
1336     /* r300 doesn't handle CLAMP and MIRROR_CLAMP correctly when either MAG
1337      * or MIN filter is NEAREST. Since texwrap produces same results
1338      * for CLAMP and CLAMP_TO_EDGE, we use them instead. */
1339     if (sampler->state.min_img_filter == PIPE_TEX_FILTER_NEAREST ||
1340         sampler->state.mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
1341         /* Wrap S. */
1342         if (sampler->state.wrap_s == PIPE_TEX_WRAP_CLAMP)
1343             sampler->state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1344         else if (sampler->state.wrap_s == PIPE_TEX_WRAP_MIRROR_CLAMP)
1345             sampler->state.wrap_s = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
1346 
1347         /* Wrap T. */
1348         if (sampler->state.wrap_t == PIPE_TEX_WRAP_CLAMP)
1349             sampler->state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1350         else if (sampler->state.wrap_t == PIPE_TEX_WRAP_MIRROR_CLAMP)
1351             sampler->state.wrap_t = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
1352 
1353         /* Wrap R. */
1354         if (sampler->state.wrap_r == PIPE_TEX_WRAP_CLAMP)
1355             sampler->state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1356         else if (sampler->state.wrap_r == PIPE_TEX_WRAP_MIRROR_CLAMP)
1357             sampler->state.wrap_r = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
1358     }
1359 
1360     sampler->filter0 |=
1361         (r300_translate_wrap(sampler->state.wrap_s) << R300_TX_WRAP_S_SHIFT) |
1362         (r300_translate_wrap(sampler->state.wrap_t) << R300_TX_WRAP_T_SHIFT) |
1363         (r300_translate_wrap(sampler->state.wrap_r) << R300_TX_WRAP_R_SHIFT);
1364 
1365     sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter,
1366                                                    state->mag_img_filter,
1367                                                    state->min_mip_filter,
1368                                                    state->max_anisotropy > 1);
1369 
1370     sampler->filter0 |= r300_anisotropy(state->max_anisotropy);
1371 
1372     /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */
1373     /* We must pass these to the merge function to clamp them properly. */
1374     sampler->min_lod = (unsigned)MAX2(state->min_lod, 0);
1375     sampler->max_lod = (unsigned)MAX2(ceilf(state->max_lod), 0);
1376 
1377     lod_bias = CLAMP((int)(state->lod_bias * 32 + 1), -(1 << 9), (1 << 9) - 1);
1378 
1379     sampler->filter1 |= (lod_bias << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK;
1380 
1381     /* This is very high quality anisotropic filtering for R5xx.
1382      * It's good for benchmarking the performance of texturing but
1383      * in practice we don't want to slow down the driver because it's
1384      * a pretty good performance killer. Feel free to play with it. */
1385     if (DBG_ON(r300, DBG_ANISOHQ) && is_r500) {
1386         sampler->filter1 |= r500_anisotropy(state->max_anisotropy);
1387     }
1388 
1389     /* R500-specific fixups and optimizations */
1390     if (r300->screen->caps.is_r500) {
1391         sampler->filter1 |= R500_BORDER_FIX;
1392     }
1393 
1394     return (void*)sampler;
1395 }
1396 
r300_bind_sampler_states(struct pipe_context * pipe,unsigned count,void ** states)1397 static void r300_bind_sampler_states(struct pipe_context* pipe,
1398                                      unsigned count,
1399                                      void** states)
1400 {
1401     struct r300_context* r300 = r300_context(pipe);
1402     struct r300_textures_state* state =
1403         (struct r300_textures_state*)r300->textures_state.state;
1404     unsigned tex_units = r300->screen->caps.num_tex_units;
1405 
1406     if (count > tex_units) {
1407         return;
1408     }
1409 
1410     memcpy(state->sampler_states, states, sizeof(void*) * count);
1411     state->sampler_state_count = count;
1412 
1413     r300_mark_atom_dirty(r300, &r300->textures_state);
1414 }
1415 
r300_lacks_vertex_textures(struct pipe_context * pipe,unsigned count,void ** states)1416 static void r300_lacks_vertex_textures(struct pipe_context* pipe,
1417                                        unsigned count,
1418                                        void** states)
1419 {
1420 }
1421 
r300_delete_sampler_state(struct pipe_context * pipe,void * state)1422 static void r300_delete_sampler_state(struct pipe_context* pipe, void* state)
1423 {
1424     FREE(state);
1425 }
1426 
r300_assign_texture_cache_region(unsigned index,unsigned num)1427 static uint32_t r300_assign_texture_cache_region(unsigned index, unsigned num)
1428 {
1429     /* This looks like a hack, but I believe it's suppose to work like
1430      * that. To illustrate how this works, let's assume you have 5 textures.
1431      * From docs, 5 and the successive numbers are:
1432      *
1433      * FOURTH_1     = 5
1434      * FOURTH_2     = 6
1435      * FOURTH_3     = 7
1436      * EIGHTH_0     = 8
1437      * EIGHTH_1     = 9
1438      *
1439      * First 3 textures will get 3/4 of size of the cache, divived evenly
1440      * between them. The last 1/4 of the cache must be divided between
1441      * the last 2 textures, each will therefore get 1/8 of the cache.
1442      * Why not just to use "5 + texture_index" ?
1443      *
1444      * This simple trick works for all "num" <= 16.
1445      */
1446     if (num <= 1)
1447         return R300_TX_CACHE(R300_TX_CACHE_WHOLE);
1448     else
1449         return R300_TX_CACHE(num + index);
1450 }
1451 
r300_set_fragment_sampler_views(struct pipe_context * pipe,unsigned count,struct pipe_sampler_view ** views)1452 static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
1453                                             unsigned count,
1454                                             struct pipe_sampler_view** views)
1455 {
1456     struct r300_context* r300 = r300_context(pipe);
1457     struct r300_textures_state* state =
1458         (struct r300_textures_state*)r300->textures_state.state;
1459     struct r300_resource *texture;
1460     unsigned i, real_num_views = 0, view_index = 0;
1461     unsigned tex_units = r300->screen->caps.num_tex_units;
1462     boolean dirty_tex = FALSE;
1463 
1464     if (count > tex_units) {
1465         return;
1466     }
1467 
1468     /* Calculate the real number of views. */
1469     for (i = 0; i < count; i++) {
1470         if (views[i])
1471             real_num_views++;
1472     }
1473 
1474     for (i = 0; i < count; i++) {
1475         pipe_sampler_view_reference(
1476                 (struct pipe_sampler_view**)&state->sampler_views[i],
1477                 views[i]);
1478 
1479         if (!views[i]) {
1480             continue;
1481         }
1482 
1483         /* A new sampler view (= texture)... */
1484         dirty_tex = TRUE;
1485 
1486         /* Set the texrect factor in the fragment shader.
1487              * Needed for RECT and NPOT fallback. */
1488         texture = r300_resource(views[i]->texture);
1489         if (texture->tex.is_npot) {
1490             r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
1491         }
1492 
1493         state->sampler_views[i]->texcache_region =
1494                 r300_assign_texture_cache_region(view_index, real_num_views);
1495         view_index++;
1496     }
1497 
1498     for (i = count; i < tex_units; i++) {
1499         if (state->sampler_views[i]) {
1500             pipe_sampler_view_reference(
1501                     (struct pipe_sampler_view**)&state->sampler_views[i],
1502                     NULL);
1503         }
1504     }
1505 
1506     state->sampler_view_count = count;
1507 
1508     r300_mark_atom_dirty(r300, &r300->textures_state);
1509 
1510     if (dirty_tex) {
1511         r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
1512     }
1513 }
1514 
1515 struct pipe_sampler_view *
r300_create_sampler_view_custom(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,unsigned width0_override,unsigned height0_override)1516 r300_create_sampler_view_custom(struct pipe_context *pipe,
1517                          struct pipe_resource *texture,
1518                          const struct pipe_sampler_view *templ,
1519                          unsigned width0_override,
1520                          unsigned height0_override)
1521 {
1522     struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view);
1523     struct r300_resource *tex = r300_resource(texture);
1524     boolean is_r500 = r300_screen(pipe->screen)->caps.is_r500;
1525     boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle;
1526 
1527     if (view) {
1528         unsigned hwformat;
1529 
1530         view->base = *templ;
1531         view->base.reference.count = 1;
1532         view->base.context = pipe;
1533         view->base.texture = NULL;
1534         pipe_resource_reference(&view->base.texture, texture);
1535 
1536 	view->width0_override = width0_override;
1537 	view->height0_override = height0_override;
1538         view->swizzle[0] = templ->swizzle_r;
1539         view->swizzle[1] = templ->swizzle_g;
1540         view->swizzle[2] = templ->swizzle_b;
1541         view->swizzle[3] = templ->swizzle_a;
1542 
1543         hwformat = r300_translate_texformat(templ->format,
1544                                             view->swizzle,
1545                                             is_r500,
1546                                             dxtc_swizzle);
1547 
1548         if (hwformat == ~0) {
1549             fprintf(stderr, "r300: Ooops. Got unsupported format %s in %s.\n",
1550                     util_format_short_name(templ->format), __func__);
1551         }
1552         assert(hwformat != ~0);
1553 
1554 	r300_texture_setup_format_state(r300_screen(pipe->screen), tex,
1555 					templ->format, 0,
1556 	                                width0_override, height0_override,
1557 					&view->format);
1558         view->format.format1 |= hwformat;
1559         if (is_r500) {
1560             view->format.format2 |= r500_tx_format_msb_bit(templ->format);
1561         }
1562     }
1563 
1564     return (struct pipe_sampler_view*)view;
1565 }
1566 
1567 static struct pipe_sampler_view *
r300_create_sampler_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ)1568 r300_create_sampler_view(struct pipe_context *pipe,
1569                          struct pipe_resource *texture,
1570                          const struct pipe_sampler_view *templ)
1571 {
1572     return r300_create_sampler_view_custom(pipe, texture, templ,
1573                                            r300_resource(texture)->tex.width0,
1574                                            r300_resource(texture)->tex.height0);
1575 }
1576 
1577 
1578 static void
r300_sampler_view_destroy(struct pipe_context * pipe,struct pipe_sampler_view * view)1579 r300_sampler_view_destroy(struct pipe_context *pipe,
1580                           struct pipe_sampler_view *view)
1581 {
1582    pipe_resource_reference(&view->texture, NULL);
1583    FREE(view);
1584 }
1585 
r300_set_scissor_state(struct pipe_context * pipe,const struct pipe_scissor_state * state)1586 static void r300_set_scissor_state(struct pipe_context* pipe,
1587                                    const struct pipe_scissor_state* state)
1588 {
1589     struct r300_context* r300 = r300_context(pipe);
1590 
1591     memcpy(r300->scissor_state.state, state,
1592         sizeof(struct pipe_scissor_state));
1593 
1594     r300_mark_atom_dirty(r300, &r300->scissor_state);
1595 }
1596 
r300_set_viewport_state(struct pipe_context * pipe,const struct pipe_viewport_state * state)1597 static void r300_set_viewport_state(struct pipe_context* pipe,
1598                                     const struct pipe_viewport_state* state)
1599 {
1600     struct r300_context* r300 = r300_context(pipe);
1601     struct r300_viewport_state* viewport =
1602         (struct r300_viewport_state*)r300->viewport_state.state;
1603 
1604     r300->viewport = *state;
1605 
1606     if (r300->draw) {
1607         draw_set_viewport_state(r300->draw, state);
1608         viewport->vte_control = R300_VTX_XY_FMT | R300_VTX_Z_FMT;
1609         return;
1610     }
1611 
1612     /* Do the transform in HW. */
1613     viewport->vte_control = R300_VTX_W0_FMT;
1614 
1615     if (state->scale[0] != 1.0f) {
1616         viewport->xscale = state->scale[0];
1617         viewport->vte_control |= R300_VPORT_X_SCALE_ENA;
1618     }
1619     if (state->scale[1] != 1.0f) {
1620         viewport->yscale = state->scale[1];
1621         viewport->vte_control |= R300_VPORT_Y_SCALE_ENA;
1622     }
1623     if (state->scale[2] != 1.0f) {
1624         viewport->zscale = state->scale[2];
1625         viewport->vte_control |= R300_VPORT_Z_SCALE_ENA;
1626     }
1627     if (state->translate[0] != 0.0f) {
1628         viewport->xoffset = state->translate[0];
1629         viewport->vte_control |= R300_VPORT_X_OFFSET_ENA;
1630     }
1631     if (state->translate[1] != 0.0f) {
1632         viewport->yoffset = state->translate[1];
1633         viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA;
1634     }
1635     if (state->translate[2] != 0.0f) {
1636         viewport->zoffset = state->translate[2];
1637         viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA;
1638     }
1639 
1640     r300_mark_atom_dirty(r300, &r300->viewport_state);
1641     if (r300->fs.state && r300_fs(r300)->shader &&
1642         r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) {
1643         r300_mark_atom_dirty(r300, &r300->fs_rc_constant_state);
1644     }
1645 }
1646 
r300_set_vertex_buffers_hwtcl(struct pipe_context * pipe,unsigned count,const struct pipe_vertex_buffer * buffers)1647 static void r300_set_vertex_buffers_hwtcl(struct pipe_context* pipe,
1648                                     unsigned count,
1649                                     const struct pipe_vertex_buffer* buffers)
1650 {
1651     struct r300_context* r300 = r300_context(pipe);
1652 
1653     /* There must be at least one vertex buffer set, otherwise it locks up. */
1654     if (!count) {
1655         buffers = &r300->dummy_vb;
1656         count = 1;
1657     }
1658 
1659     util_copy_vertex_buffers(r300->vertex_buffer,
1660                              &r300->nr_vertex_buffers,
1661                              buffers, count);
1662 
1663     r300->vertex_arrays_dirty = TRUE;
1664 }
1665 
r300_set_vertex_buffers_swtcl(struct pipe_context * pipe,unsigned count,const struct pipe_vertex_buffer * buffers)1666 static void r300_set_vertex_buffers_swtcl(struct pipe_context* pipe,
1667                                     unsigned count,
1668                                     const struct pipe_vertex_buffer* buffers)
1669 {
1670     struct r300_context* r300 = r300_context(pipe);
1671     unsigned i;
1672 
1673     util_copy_vertex_buffers(r300->vertex_buffer,
1674                              &r300->nr_vertex_buffers,
1675                              buffers, count);
1676     draw_set_vertex_buffers(r300->draw, count, buffers);
1677 
1678     for (i = 0; i < count; i++) {
1679         if (buffers[i].user_buffer) {
1680             draw_set_mapped_vertex_buffer(r300->draw, i,
1681                                           buffers[i].user_buffer);
1682         } else if (buffers[i].buffer) {
1683             draw_set_mapped_vertex_buffer(r300->draw, i,
1684                 r300_resource(buffers[i].buffer)->malloced_buffer);
1685         }
1686     }
1687 }
1688 
r300_set_index_buffer_hwtcl(struct pipe_context * pipe,const struct pipe_index_buffer * ib)1689 static void r300_set_index_buffer_hwtcl(struct pipe_context* pipe,
1690                                         const struct pipe_index_buffer *ib)
1691 {
1692     struct r300_context* r300 = r300_context(pipe);
1693 
1694     if (ib) {
1695         pipe_resource_reference(&r300->index_buffer.buffer, ib->buffer);
1696         memcpy(&r300->index_buffer, ib, sizeof(*ib));
1697     } else {
1698         pipe_resource_reference(&r300->index_buffer.buffer, NULL);
1699     }
1700 }
1701 
r300_set_index_buffer_swtcl(struct pipe_context * pipe,const struct pipe_index_buffer * ib)1702 static void r300_set_index_buffer_swtcl(struct pipe_context* pipe,
1703                                         const struct pipe_index_buffer *ib)
1704 {
1705     struct r300_context* r300 = r300_context(pipe);
1706 
1707     if (ib) {
1708         const void *buf = NULL;
1709         if (ib->user_buffer) {
1710             buf = ib->user_buffer;
1711         } else if (ib->buffer) {
1712             buf = r300_resource(ib->buffer)->malloced_buffer;
1713         }
1714         draw_set_indexes(r300->draw,
1715                          (const ubyte *) buf + ib->offset,
1716                          ib->index_size);
1717     }
1718 }
1719 
1720 /* Initialize the PSC tables. */
r300_vertex_psc(struct r300_vertex_element_state * velems)1721 static void r300_vertex_psc(struct r300_vertex_element_state *velems)
1722 {
1723     struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
1724     uint16_t type, swizzle;
1725     enum pipe_format format;
1726     unsigned i;
1727 
1728     /* Vertex shaders have no semantics on their inputs,
1729      * so PSC should just route stuff based on the vertex elements,
1730      * and not on attrib information. */
1731     for (i = 0; i < velems->count; i++) {
1732         format = velems->velem[i].src_format;
1733 
1734         type = r300_translate_vertex_data_type(format);
1735         if (type == R300_INVALID_FORMAT) {
1736             fprintf(stderr, "r300: Bad vertex format %s.\n",
1737                     util_format_short_name(format));
1738             assert(0);
1739             abort();
1740         }
1741 
1742         type |= i << R300_DST_VEC_LOC_SHIFT;
1743         swizzle = r300_translate_vertex_data_swizzle(format);
1744 
1745         if (i & 1) {
1746             vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
1747             vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
1748         } else {
1749             vstream->vap_prog_stream_cntl[i >> 1] |= type;
1750             vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
1751         }
1752     }
1753 
1754     /* Set the last vector in the PSC. */
1755     if (i) {
1756         i -= 1;
1757     }
1758     vstream->vap_prog_stream_cntl[i >> 1] |=
1759         (R300_LAST_VEC << (i & 1 ? 16 : 0));
1760 
1761     vstream->count = (i >> 1) + 1;
1762 }
1763 
r300_create_vertex_elements_state(struct pipe_context * pipe,unsigned count,const struct pipe_vertex_element * attribs)1764 static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
1765                                                unsigned count,
1766                                                const struct pipe_vertex_element* attribs)
1767 {
1768     struct r300_vertex_element_state *velems;
1769     unsigned i;
1770     struct pipe_vertex_element dummy_attrib = {0};
1771 
1772     /* R300 Programmable Stream Control (PSC) doesn't support 0 vertex elements. */
1773     if (!count) {
1774         dummy_attrib.src_format = PIPE_FORMAT_R8G8B8A8_UNORM;
1775         attribs = &dummy_attrib;
1776         count = 1;
1777     } else if (count > 16) {
1778         fprintf(stderr, "r300: More than 16 vertex elements are not supported,"
1779                 " requested %i, using 16.\n", count);
1780         count = 16;
1781     }
1782 
1783     velems = CALLOC_STRUCT(r300_vertex_element_state);
1784     if (!velems)
1785         return NULL;
1786 
1787     velems->count = count;
1788     memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
1789 
1790     if (r300_screen(pipe->screen)->caps.has_tcl) {
1791         /* Setup PSC.
1792          * The unused components will be replaced by (..., 0, 1). */
1793         r300_vertex_psc(velems);
1794 
1795         for (i = 0; i < count; i++) {
1796             velems->format_size[i] =
1797                 align(util_format_get_blocksize(velems->velem[i].src_format), 4);
1798             velems->vertex_size_dwords += velems->format_size[i] / 4;
1799         }
1800     }
1801 
1802     return velems;
1803 }
1804 
r300_bind_vertex_elements_state(struct pipe_context * pipe,void * state)1805 static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
1806                                             void *state)
1807 {
1808     struct r300_context *r300 = r300_context(pipe);
1809     struct r300_vertex_element_state *velems = state;
1810 
1811     if (velems == NULL) {
1812         return;
1813     }
1814 
1815     r300->velems = velems;
1816 
1817     if (r300->draw) {
1818         draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
1819         return;
1820     }
1821 
1822     UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
1823     r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
1824     r300->vertex_arrays_dirty = TRUE;
1825 }
1826 
r300_delete_vertex_elements_state(struct pipe_context * pipe,void * state)1827 static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
1828 {
1829     FREE(state);
1830 }
1831 
r300_create_vs_state(struct pipe_context * pipe,const struct pipe_shader_state * shader)1832 static void* r300_create_vs_state(struct pipe_context* pipe,
1833                                   const struct pipe_shader_state* shader)
1834 {
1835     struct r300_context* r300 = r300_context(pipe);
1836     struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader);
1837 
1838     /* Copy state directly into shader. */
1839     vs->state = *shader;
1840     vs->state.tokens = tgsi_dup_tokens(shader->tokens);
1841 
1842     if (r300->screen->caps.has_tcl) {
1843         r300_init_vs_outputs(r300, vs);
1844         r300_translate_vertex_shader(r300, vs);
1845     } else {
1846         r300_draw_init_vertex_shader(r300, vs);
1847     }
1848 
1849     return vs;
1850 }
1851 
r300_bind_vs_state(struct pipe_context * pipe,void * shader)1852 static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
1853 {
1854     struct r300_context* r300 = r300_context(pipe);
1855     struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
1856 
1857     if (vs == NULL) {
1858         r300->vs_state.state = NULL;
1859         return;
1860     }
1861     if (vs == r300->vs_state.state) {
1862         return;
1863     }
1864     r300->vs_state.state = vs;
1865 
1866     /* The majority of the RS block bits is dependent on the vertex shader. */
1867     r300_mark_atom_dirty(r300, &r300->rs_block_state); /* Will be updated before the emission. */
1868 
1869     if (r300->screen->caps.has_tcl) {
1870         unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2;
1871         r300_mark_atom_dirty(r300, &r300->vs_state);
1872         r300->vs_state.size = vs->code.length + 9 +
1873 			(R300_VS_MAX_FC_OPS * fc_op_dwords + 4);
1874 
1875         r300_mark_atom_dirty(r300, &r300->vs_constants);
1876         r300->vs_constants.size =
1877                 2 +
1878                 (vs->externals_count ? vs->externals_count * 4 + 3 : 0) +
1879                 (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0);
1880 
1881         ((struct r300_constant_buffer*)r300->vs_constants.state)->remap_table =
1882                 vs->code.constants_remap_table;
1883 
1884         r300_mark_atom_dirty(r300, &r300->pvs_flush);
1885     } else {
1886         draw_bind_vertex_shader(r300->draw,
1887                 (struct draw_vertex_shader*)vs->draw_vs);
1888     }
1889 }
1890 
r300_delete_vs_state(struct pipe_context * pipe,void * shader)1891 static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
1892 {
1893     struct r300_context* r300 = r300_context(pipe);
1894     struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
1895 
1896     if (r300->screen->caps.has_tcl) {
1897         rc_constants_destroy(&vs->code.constants);
1898         if (vs->code.constants_remap_table)
1899             FREE(vs->code.constants_remap_table);
1900     } else {
1901         draw_delete_vertex_shader(r300->draw,
1902                 (struct draw_vertex_shader*)vs->draw_vs);
1903     }
1904 
1905     FREE((void*)vs->state.tokens);
1906     FREE(shader);
1907 }
1908 
r300_set_constant_buffer(struct pipe_context * pipe,uint shader,uint index,struct pipe_constant_buffer * cb)1909 static void r300_set_constant_buffer(struct pipe_context *pipe,
1910                                      uint shader, uint index,
1911                                      struct pipe_constant_buffer *cb)
1912 {
1913     struct r300_context* r300 = r300_context(pipe);
1914     struct r300_constant_buffer *cbuf;
1915     uint32_t *mapped;
1916 
1917     if (!cb)
1918         return;
1919 
1920     switch (shader) {
1921         case PIPE_SHADER_VERTEX:
1922             cbuf = (struct r300_constant_buffer*)r300->vs_constants.state;
1923             break;
1924         case PIPE_SHADER_FRAGMENT:
1925             cbuf = (struct r300_constant_buffer*)r300->fs_constants.state;
1926             break;
1927         default:
1928             return;
1929     }
1930 
1931 
1932     if (cb->user_buffer)
1933         mapped = (uint32_t*)cb->user_buffer;
1934     else {
1935         struct r300_resource *rbuf = r300_resource(cb->buffer);
1936 
1937         if (rbuf && rbuf->malloced_buffer)
1938             mapped = (uint32_t*)rbuf->malloced_buffer;
1939         else
1940             return;
1941     }
1942 
1943     if (shader == PIPE_SHADER_FRAGMENT ||
1944         (shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) {
1945         cbuf->ptr = mapped;
1946     }
1947 
1948     if (shader == PIPE_SHADER_VERTEX) {
1949         if (r300->screen->caps.has_tcl) {
1950             struct r300_vertex_shader *vs =
1951                     (struct r300_vertex_shader*)r300->vs_state.state;
1952 
1953             if (!vs) {
1954                 cbuf->buffer_base = 0;
1955                 return;
1956             }
1957 
1958             cbuf->buffer_base = r300->vs_const_base;
1959             r300->vs_const_base += vs->code.constants.Count;
1960             if (r300->vs_const_base > R500_MAX_PVS_CONST_VECS) {
1961                 r300->vs_const_base = vs->code.constants.Count;
1962                 cbuf->buffer_base = 0;
1963                 r300_mark_atom_dirty(r300, &r300->pvs_flush);
1964             }
1965             r300_mark_atom_dirty(r300, &r300->vs_constants);
1966         } else if (r300->draw) {
1967             draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX,
1968                 0, mapped, cb->buffer_size);
1969         }
1970     } else if (shader == PIPE_SHADER_FRAGMENT) {
1971         r300_mark_atom_dirty(r300, &r300->fs_constants);
1972     }
1973 }
1974 
r300_texture_barrier(struct pipe_context * pipe)1975 static void r300_texture_barrier(struct pipe_context *pipe)
1976 {
1977     struct r300_context *r300 = r300_context(pipe);
1978 
1979     r300_mark_atom_dirty(r300, &r300->gpu_flush);
1980     r300_mark_atom_dirty(r300, &r300->texture_cache_inval);
1981 }
1982 
r300_init_state_functions(struct r300_context * r300)1983 void r300_init_state_functions(struct r300_context* r300)
1984 {
1985     r300->context.create_blend_state = r300_create_blend_state;
1986     r300->context.bind_blend_state = r300_bind_blend_state;
1987     r300->context.delete_blend_state = r300_delete_blend_state;
1988 
1989     r300->context.set_blend_color = r300_set_blend_color;
1990 
1991     r300->context.set_clip_state = r300_set_clip_state;
1992     r300->context.set_sample_mask = r300_set_sample_mask;
1993 
1994     r300->context.set_constant_buffer = r300_set_constant_buffer;
1995 
1996     r300->context.create_depth_stencil_alpha_state = r300_create_dsa_state;
1997     r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state;
1998     r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state;
1999 
2000     r300->context.set_stencil_ref = r300_set_stencil_ref;
2001 
2002     r300->context.set_framebuffer_state = r300_set_framebuffer_state;
2003 
2004     r300->context.create_fs_state = r300_create_fs_state;
2005     r300->context.bind_fs_state = r300_bind_fs_state;
2006     r300->context.delete_fs_state = r300_delete_fs_state;
2007 
2008     r300->context.set_polygon_stipple = r300_set_polygon_stipple;
2009 
2010     r300->context.create_rasterizer_state = r300_create_rs_state;
2011     r300->context.bind_rasterizer_state = r300_bind_rs_state;
2012     r300->context.delete_rasterizer_state = r300_delete_rs_state;
2013 
2014     r300->context.create_sampler_state = r300_create_sampler_state;
2015     r300->context.bind_fragment_sampler_states = r300_bind_sampler_states;
2016     r300->context.bind_vertex_sampler_states = r300_lacks_vertex_textures;
2017     r300->context.delete_sampler_state = r300_delete_sampler_state;
2018 
2019     r300->context.set_fragment_sampler_views = r300_set_fragment_sampler_views;
2020     r300->context.create_sampler_view = r300_create_sampler_view;
2021     r300->context.sampler_view_destroy = r300_sampler_view_destroy;
2022 
2023     r300->context.set_scissor_state = r300_set_scissor_state;
2024 
2025     r300->context.set_viewport_state = r300_set_viewport_state;
2026 
2027     if (r300->screen->caps.has_tcl) {
2028         r300->context.set_vertex_buffers = r300_set_vertex_buffers_hwtcl;
2029         r300->context.set_index_buffer = r300_set_index_buffer_hwtcl;
2030     } else {
2031         r300->context.set_vertex_buffers = r300_set_vertex_buffers_swtcl;
2032         r300->context.set_index_buffer = r300_set_index_buffer_swtcl;
2033     }
2034 
2035     r300->context.create_vertex_elements_state = r300_create_vertex_elements_state;
2036     r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state;
2037     r300->context.delete_vertex_elements_state = r300_delete_vertex_elements_state;
2038 
2039     r300->context.create_vs_state = r300_create_vs_state;
2040     r300->context.bind_vs_state = r300_bind_vs_state;
2041     r300->context.delete_vs_state = r300_delete_vs_state;
2042 
2043     r300->context.texture_barrier = r300_texture_barrier;
2044 }
2045