1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 */
32
33 #include "util/u_memory.h"
34 #include "util/u_math.h"
35 #include "draw/draw_context.h"
36 #include "draw/draw_private.h"
37 #include "draw/draw_vbuf.h"
38 #include "draw/draw_vertex.h"
39 #include "draw/draw_vs.h"
40 #include "translate/translate.h"
41
42 /* A first pass at incorporating vertex fetch/emit functionality into
43 */
44 struct draw_vs_variant_generic {
45 struct draw_vs_variant base;
46
47 struct draw_vertex_shader *shader;
48 struct draw_context *draw;
49
50 /* Basic plan is to run these two translate functions before/after
51 * the vertex shader's existing run_linear() routine to simulate
52 * the inclusion of this functionality into the shader...
53 *
54 * Next will look at actually including it.
55 */
56 struct translate *fetch;
57 struct translate *emit;
58
59 unsigned temp_vertex_stride;
60 };
61
62
63
64
65
vsvg_set_buffer(struct draw_vs_variant * variant,unsigned buffer,const void * ptr,unsigned stride,unsigned max_index)66 static void vsvg_set_buffer( struct draw_vs_variant *variant,
67 unsigned buffer,
68 const void *ptr,
69 unsigned stride,
70 unsigned max_index )
71 {
72 struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
73
74 vsvg->fetch->set_buffer(vsvg->fetch,
75 buffer,
76 ptr,
77 stride,
78 max_index );
79 }
80
81
82 /* Mainly for debug at this stage:
83 */
do_rhw_viewport(struct draw_vs_variant_generic * vsvg,unsigned count,void * output_buffer)84 static void do_rhw_viewport( struct draw_vs_variant_generic *vsvg,
85 unsigned count,
86 void *output_buffer )
87 {
88 char *ptr = (char *)output_buffer;
89 const float *scale = vsvg->base.vs->draw->viewport.scale;
90 const float *trans = vsvg->base.vs->draw->viewport.translate;
91 unsigned stride = vsvg->temp_vertex_stride;
92 unsigned j;
93
94 ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
95
96 for (j = 0; j < count; j++, ptr += stride) {
97 float *data = (float *)ptr;
98 float w = 1.0f / data[3];
99
100 data[0] = data[0] * w * scale[0] + trans[0];
101 data[1] = data[1] * w * scale[1] + trans[1];
102 data[2] = data[2] * w * scale[2] + trans[2];
103 data[3] = w;
104 }
105 }
106
do_viewport(struct draw_vs_variant_generic * vsvg,unsigned count,void * output_buffer)107 static void do_viewport( struct draw_vs_variant_generic *vsvg,
108 unsigned count,
109 void *output_buffer )
110 {
111 char *ptr = (char *)output_buffer;
112 const float *scale = vsvg->base.vs->draw->viewport.scale;
113 const float *trans = vsvg->base.vs->draw->viewport.translate;
114 unsigned stride = vsvg->temp_vertex_stride;
115 unsigned j;
116
117 ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
118
119 for (j = 0; j < count; j++, ptr += stride) {
120 float *data = (float *)ptr;
121
122 data[0] = data[0] * scale[0] + trans[0];
123 data[1] = data[1] * scale[1] + trans[1];
124 data[2] = data[2] * scale[2] + trans[2];
125 }
126 }
127
128
vsvg_run_elts(struct draw_vs_variant * variant,const unsigned * elts,unsigned count,void * output_buffer)129 static void PIPE_CDECL vsvg_run_elts( struct draw_vs_variant *variant,
130 const unsigned *elts,
131 unsigned count,
132 void *output_buffer)
133 {
134 struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
135 unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
136 void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
137
138 if (0) debug_printf("%s %d \n", __FUNCTION__, count);
139
140 /* Want to do this in small batches for cache locality?
141 */
142
143 vsvg->fetch->run_elts( vsvg->fetch,
144 elts,
145 count,
146 vsvg->draw->instance_id,
147 temp_buffer );
148
149 vsvg->base.vs->run_linear( vsvg->base.vs,
150 temp_buffer,
151 temp_buffer,
152 vsvg->base.vs->draw->pt.user.vs_constants,
153 vsvg->base.vs->draw->pt.user.vs_constants_size,
154 count,
155 temp_vertex_stride,
156 temp_vertex_stride);
157
158 /* FIXME: geometry shading? */
159
160 if (vsvg->base.key.clip) {
161 /* not really handling clipping, just do the rhw so we can
162 * see the results...
163 */
164 do_rhw_viewport( vsvg,
165 count,
166 temp_buffer );
167 }
168 else if (vsvg->base.key.viewport) {
169 do_viewport( vsvg,
170 count,
171 temp_buffer );
172 }
173
174
175 vsvg->emit->set_buffer( vsvg->emit,
176 0,
177 temp_buffer,
178 temp_vertex_stride,
179 ~0 );
180
181 vsvg->emit->set_buffer( vsvg->emit,
182 1,
183 &vsvg->draw->rasterizer->point_size,
184 0,
185 ~0 );
186
187 vsvg->emit->run( vsvg->emit,
188 0, count,
189 vsvg->draw->instance_id,
190 output_buffer );
191
192 FREE(temp_buffer);
193 }
194
195
vsvg_run_linear(struct draw_vs_variant * variant,unsigned start,unsigned count,void * output_buffer)196 static void PIPE_CDECL vsvg_run_linear( struct draw_vs_variant *variant,
197 unsigned start,
198 unsigned count,
199 void *output_buffer )
200 {
201 struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
202 unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
203 void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
204
205 if (0) debug_printf("%s %d %d (sz %d, %d)\n", __FUNCTION__, start, count,
206 vsvg->base.key.output_stride,
207 temp_vertex_stride);
208
209 vsvg->fetch->run( vsvg->fetch,
210 start,
211 count,
212 vsvg->draw->instance_id,
213 temp_buffer );
214
215 vsvg->base.vs->run_linear( vsvg->base.vs,
216 temp_buffer,
217 temp_buffer,
218 vsvg->base.vs->draw->pt.user.vs_constants,
219 vsvg->base.vs->draw->pt.user.vs_constants_size,
220 count,
221 temp_vertex_stride,
222 temp_vertex_stride);
223
224 if (vsvg->base.key.clip) {
225 /* not really handling clipping, just do the rhw so we can
226 * see the results...
227 */
228 do_rhw_viewport( vsvg,
229 count,
230 temp_buffer );
231 }
232 else if (vsvg->base.key.viewport) {
233 do_viewport( vsvg,
234 count,
235 temp_buffer );
236 }
237
238 vsvg->emit->set_buffer( vsvg->emit,
239 0,
240 temp_buffer,
241 temp_vertex_stride,
242 ~0 );
243
244 vsvg->emit->set_buffer( vsvg->emit,
245 1,
246 &vsvg->draw->rasterizer->point_size,
247 0,
248 ~0 );
249
250 vsvg->emit->run( vsvg->emit,
251 0, count,
252 vsvg->draw->instance_id,
253 output_buffer );
254
255 FREE(temp_buffer);
256 }
257
258
259
260
261
vsvg_destroy(struct draw_vs_variant * variant)262 static void vsvg_destroy( struct draw_vs_variant *variant )
263 {
264 FREE(variant);
265 }
266
267
268 struct draw_vs_variant *
draw_vs_create_variant_generic(struct draw_vertex_shader * vs,const struct draw_vs_variant_key * key)269 draw_vs_create_variant_generic( struct draw_vertex_shader *vs,
270 const struct draw_vs_variant_key *key )
271 {
272 unsigned i;
273 struct translate_key fetch, emit;
274
275 struct draw_vs_variant_generic *vsvg = CALLOC_STRUCT( draw_vs_variant_generic );
276 if (vsvg == NULL)
277 return NULL;
278
279 vsvg->base.key = *key;
280 vsvg->base.vs = vs;
281 vsvg->base.set_buffer = vsvg_set_buffer;
282 vsvg->base.run_elts = vsvg_run_elts;
283 vsvg->base.run_linear = vsvg_run_linear;
284 vsvg->base.destroy = vsvg_destroy;
285
286 vsvg->draw = vs->draw;
287
288 vsvg->temp_vertex_stride = MAX2(key->nr_inputs,
289 vsvg->base.vs->info.num_outputs) * 4 * sizeof(float);
290
291 /* Build free-standing fetch and emit functions:
292 */
293 fetch.nr_elements = key->nr_inputs;
294 fetch.output_stride = vsvg->temp_vertex_stride;
295 for (i = 0; i < key->nr_inputs; i++) {
296 fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL;
297 fetch.element[i].input_format = key->element[i].in.format;
298 fetch.element[i].input_buffer = key->element[i].in.buffer;
299 fetch.element[i].input_offset = key->element[i].in.offset;
300 fetch.element[i].instance_divisor = 0;
301 fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
302 fetch.element[i].output_offset = i * 4 * sizeof(float);
303 assert(fetch.element[i].output_offset < fetch.output_stride);
304 }
305
306
307 emit.nr_elements = key->nr_outputs;
308 emit.output_stride = key->output_stride;
309 for (i = 0; i < key->nr_outputs; i++) {
310 if (key->element[i].out.format != EMIT_1F_PSIZE)
311 {
312 emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
313 emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
314 emit.element[i].input_buffer = 0;
315 emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
316 emit.element[i].instance_divisor = 0;
317 emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
318 emit.element[i].output_offset = key->element[i].out.offset;
319 assert(emit.element[i].input_offset <= fetch.output_stride);
320 }
321 else {
322 emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
323 emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
324 emit.element[i].input_buffer = 1;
325 emit.element[i].input_offset = 0;
326 emit.element[i].instance_divisor = 0;
327 emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
328 emit.element[i].output_offset = key->element[i].out.offset;
329 }
330 }
331
332 vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch );
333 vsvg->emit = draw_vs_get_emit( vs->draw, &emit );
334
335 return &vsvg->base;
336 }
337
338
339
340
341
342