1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28  /*
29   * Authors:
30   *   Keith Whitwell <keithw@vmware.com>
31   */
32 
33 #include "util/u_memory.h"
34 #include "util/u_math.h"
35 #include "draw/draw_context.h"
36 #include "draw/draw_private.h"
37 #include "draw/draw_vbuf.h"
38 #include "draw/draw_vertex.h"
39 #include "draw/draw_vs.h"
40 #include "translate/translate.h"
41 
42 /* A first pass at incorporating vertex fetch/emit functionality into
43  */
44 struct draw_vs_variant_generic {
45    struct draw_vs_variant base;
46 
47    struct draw_vertex_shader *shader;
48    struct draw_context *draw;
49 
50    /* Basic plan is to run these two translate functions before/after
51     * the vertex shader's existing run_linear() routine to simulate
52     * the inclusion of this functionality into the shader...
53     *
54     * Next will look at actually including it.
55     */
56    struct translate *fetch;
57    struct translate *emit;
58 
59    unsigned temp_vertex_stride;
60 };
61 
62 
63 
64 
65 
vsvg_set_buffer(struct draw_vs_variant * variant,unsigned buffer,const void * ptr,unsigned stride,unsigned max_index)66 static void vsvg_set_buffer( struct draw_vs_variant *variant,
67                              unsigned buffer,
68                              const void *ptr,
69                              unsigned stride,
70                              unsigned max_index )
71 {
72    struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
73 
74    vsvg->fetch->set_buffer(vsvg->fetch,
75                            buffer,
76                            ptr,
77                            stride,
78                            max_index );
79 }
80 
81 static const struct pipe_viewport_state *
find_viewport(struct draw_context * draw,char * buffer,unsigned vertex_idx,unsigned stride)82 find_viewport(struct draw_context *draw,
83               char *buffer,
84               unsigned vertex_idx,
85               unsigned stride)
86 {
87    int viewport_index_output =
88       draw_current_shader_viewport_index_output(draw);
89    char *ptr = buffer + vertex_idx * stride;
90    unsigned *data = (unsigned *)ptr;
91    int viewport_index =
92       draw_current_shader_uses_viewport_index(draw) ?
93       data[viewport_index_output * 4] : 0;
94 
95    viewport_index = draw_clamp_viewport_idx(viewport_index);
96 
97    return &draw->viewports[viewport_index];
98 }
99 
100 
101 /* Mainly for debug at this stage:
102  */
do_rhw_viewport(struct draw_vs_variant_generic * vsvg,unsigned count,void * output_buffer)103 static void do_rhw_viewport( struct draw_vs_variant_generic *vsvg,
104                              unsigned count,
105                              void *output_buffer )
106 {
107    char *ptr = (char *)output_buffer;
108    unsigned stride = vsvg->temp_vertex_stride;
109    unsigned j;
110 
111    ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
112 
113    for (j = 0; j < count; j++, ptr += stride) {
114       const struct pipe_viewport_state *viewport =
115          find_viewport(vsvg->base.vs->draw, (char*)output_buffer,
116                        j, stride);
117       const float *scale = viewport->scale;
118       const float *trans = viewport->translate;
119       float *data = (float *)ptr;
120       float w = 1.0f / data[3];
121 
122       data[0] = data[0] * w * scale[0] + trans[0];
123       data[1] = data[1] * w * scale[1] + trans[1];
124       data[2] = data[2] * w * scale[2] + trans[2];
125       data[3] = w;
126    }
127 }
128 
do_viewport(struct draw_vs_variant_generic * vsvg,unsigned count,void * output_buffer)129 static void do_viewport( struct draw_vs_variant_generic *vsvg,
130                          unsigned count,
131                          void *output_buffer )
132 {
133    char *ptr = (char *)output_buffer;
134    unsigned stride = vsvg->temp_vertex_stride;
135    unsigned j;
136 
137    ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
138 
139    for (j = 0; j < count; j++, ptr += stride) {
140       const struct pipe_viewport_state *viewport =
141          find_viewport(vsvg->base.vs->draw, (char*)output_buffer,
142                        j, stride);
143       const float *scale = viewport->scale;
144       const float *trans = viewport->translate;
145       float *data = (float *)ptr;
146 
147       data[0] = data[0] * scale[0] + trans[0];
148       data[1] = data[1] * scale[1] + trans[1];
149       data[2] = data[2] * scale[2] + trans[2];
150    }
151 }
152 
153 
vsvg_run_elts(struct draw_vs_variant * variant,const unsigned * elts,unsigned count,void * output_buffer)154 static void PIPE_CDECL vsvg_run_elts( struct draw_vs_variant *variant,
155                                       const unsigned *elts,
156                                       unsigned count,
157                                       void *output_buffer)
158 {
159    struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
160    unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
161    void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
162 
163    if (0) debug_printf("%s %d \n", __FUNCTION__,  count);
164 
165    /* Want to do this in small batches for cache locality?
166     */
167 
168    vsvg->fetch->run_elts( vsvg->fetch,
169                           elts,
170                           count,
171                           vsvg->draw->start_instance,
172                           vsvg->draw->instance_id,
173                           temp_buffer );
174 
175    vsvg->base.vs->run_linear( vsvg->base.vs,
176                               temp_buffer,
177                               temp_buffer,
178                               vsvg->base.vs->draw->pt.user.vs_constants,
179                               vsvg->base.vs->draw->pt.user.vs_constants_size,
180                               count,
181                               temp_vertex_stride,
182                               temp_vertex_stride);
183 
184    /* FIXME: geometry shading? */
185 
186    if (vsvg->base.key.clip) {
187       /* not really handling clipping, just do the rhw so we can
188        * see the results...
189        */
190       do_rhw_viewport( vsvg,
191                        count,
192                        temp_buffer );
193    }
194    else if (vsvg->base.key.viewport) {
195       do_viewport( vsvg,
196                    count,
197                    temp_buffer );
198    }
199 
200 
201    vsvg->emit->set_buffer( vsvg->emit,
202                            0,
203                            temp_buffer,
204                            temp_vertex_stride,
205                            ~0 );
206 
207    vsvg->emit->set_buffer( vsvg->emit,
208                            1,
209                            &vsvg->draw->rasterizer->point_size,
210                            0,
211                            ~0 );
212 
213    vsvg->emit->run( vsvg->emit,
214                     0, count,
215                     vsvg->draw->start_instance,
216                     vsvg->draw->instance_id,
217                     output_buffer );
218 
219    FREE(temp_buffer);
220 }
221 
222 
vsvg_run_linear(struct draw_vs_variant * variant,unsigned start,unsigned count,void * output_buffer)223 static void PIPE_CDECL vsvg_run_linear( struct draw_vs_variant *variant,
224                                         unsigned start,
225                                         unsigned count,
226                                         void *output_buffer )
227 {
228    struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
229    unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
230    void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
231 
232    if (0) debug_printf("%s %d %d (sz %d, %d)\n", __FUNCTION__, start, count,
233                        vsvg->base.key.output_stride,
234                        temp_vertex_stride);
235 
236    vsvg->fetch->run( vsvg->fetch,
237                      start,
238                      count,
239                      vsvg->draw->start_instance,
240                      vsvg->draw->instance_id,
241                      temp_buffer );
242 
243    vsvg->base.vs->run_linear( vsvg->base.vs,
244                               temp_buffer,
245                               temp_buffer,
246                               vsvg->base.vs->draw->pt.user.vs_constants,
247                               vsvg->base.vs->draw->pt.user.vs_constants_size,
248                               count,
249                               temp_vertex_stride,
250                               temp_vertex_stride);
251 
252    if (vsvg->base.key.clip) {
253       /* not really handling clipping, just do the rhw so we can
254        * see the results...
255        */
256       do_rhw_viewport( vsvg,
257                        count,
258                        temp_buffer );
259    }
260    else if (vsvg->base.key.viewport) {
261       do_viewport( vsvg,
262                    count,
263                    temp_buffer );
264    }
265 
266    vsvg->emit->set_buffer( vsvg->emit,
267                            0,
268                            temp_buffer,
269                            temp_vertex_stride,
270                            ~0 );
271 
272    vsvg->emit->set_buffer( vsvg->emit,
273                            1,
274                            &vsvg->draw->rasterizer->point_size,
275                            0,
276                            ~0 );
277 
278    vsvg->emit->run( vsvg->emit,
279                     0, count,
280                     vsvg->draw->start_instance,
281                     vsvg->draw->instance_id,
282                     output_buffer );
283 
284    FREE(temp_buffer);
285 }
286 
287 
288 
289 
290 
vsvg_destroy(struct draw_vs_variant * variant)291 static void vsvg_destroy( struct draw_vs_variant *variant )
292 {
293    FREE(variant);
294 }
295 
296 
297 struct draw_vs_variant *
draw_vs_create_variant_generic(struct draw_vertex_shader * vs,const struct draw_vs_variant_key * key)298 draw_vs_create_variant_generic( struct draw_vertex_shader *vs,
299                                 const struct draw_vs_variant_key *key )
300 {
301    unsigned i;
302    struct translate_key fetch, emit;
303 
304    struct draw_vs_variant_generic *vsvg = CALLOC_STRUCT( draw_vs_variant_generic );
305    if (!vsvg)
306       return NULL;
307 
308    vsvg->base.key = *key;
309    vsvg->base.vs = vs;
310    vsvg->base.set_buffer    = vsvg_set_buffer;
311    vsvg->base.run_elts      = vsvg_run_elts;
312    vsvg->base.run_linear    = vsvg_run_linear;
313    vsvg->base.destroy       = vsvg_destroy;
314 
315    vsvg->draw = vs->draw;
316 
317    vsvg->temp_vertex_stride = MAX2(key->nr_inputs,
318                                    draw_total_vs_outputs(vs->draw)) * 4 * sizeof(float);
319 
320    /* Build free-standing fetch and emit functions:
321     */
322    fetch.nr_elements = key->nr_inputs;
323    fetch.output_stride = vsvg->temp_vertex_stride;
324    for (i = 0; i < key->nr_inputs; i++) {
325       fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL;
326       fetch.element[i].input_format = key->element[i].in.format;
327       fetch.element[i].input_buffer = key->element[i].in.buffer;
328       fetch.element[i].input_offset = key->element[i].in.offset;
329       fetch.element[i].instance_divisor = 0;
330       fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
331       fetch.element[i].output_offset = i * 4 * sizeof(float);
332       assert(fetch.element[i].output_offset < fetch.output_stride);
333    }
334 
335 
336    emit.nr_elements = key->nr_outputs;
337    emit.output_stride = key->output_stride;
338    for (i = 0; i < key->nr_outputs; i++) {
339       if (key->element[i].out.format != EMIT_1F_PSIZE)
340       {
341          emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
342          emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
343          emit.element[i].input_buffer = 0;
344          emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
345          emit.element[i].instance_divisor = 0;
346          emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
347          emit.element[i].output_offset = key->element[i].out.offset;
348          assert(emit.element[i].input_offset <= fetch.output_stride);
349       }
350       else {
351          emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
352          emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
353          emit.element[i].input_buffer = 1;
354          emit.element[i].input_offset = 0;
355          emit.element[i].instance_divisor = 0;
356          emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
357          emit.element[i].output_offset = key->element[i].out.offset;
358       }
359    }
360 
361    vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch );
362    vsvg->emit = draw_vs_get_emit( vs->draw, &emit );
363 
364    return &vsvg->base;
365 }
366 
367 
368 
369 
370 
371