1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Keith Whitwell <keithw@vmware.com>
26  */
27 
28 #include "main/glheader.h"
29 #include "main/context.h"
30 #include "main/imports.h"
31 #include "main/mtypes.h"
32 
33 #include "t_context.h"
34 #include "t_pipeline.h"
35 #include "t_vp_build.h"
36 #include "t_vertex.h"
37 
_tnl_install_pipeline(struct gl_context * ctx,const struct tnl_pipeline_stage ** stages)38 void _tnl_install_pipeline( struct gl_context *ctx,
39 			    const struct tnl_pipeline_stage **stages )
40 {
41    TNLcontext *tnl = TNL_CONTEXT(ctx);
42    GLuint i;
43 
44    tnl->pipeline.new_state = ~0;
45 
46    /* Create a writeable copy of each stage.
47     */
48    for (i = 0 ; i < MAX_PIPELINE_STAGES && stages[i] ; i++) {
49       struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
50       memcpy(s, stages[i], sizeof(*s));
51       if (s->create)
52 	 s->create(ctx, s);
53    }
54 
55    tnl->pipeline.nr_stages = i;
56 }
57 
_tnl_destroy_pipeline(struct gl_context * ctx)58 void _tnl_destroy_pipeline( struct gl_context *ctx )
59 {
60    TNLcontext *tnl = TNL_CONTEXT(ctx);
61    GLuint i;
62 
63    for (i = 0 ; i < tnl->pipeline.nr_stages ; i++) {
64       struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
65       if (s->destroy)
66 	 s->destroy(s);
67    }
68 
69    tnl->pipeline.nr_stages = 0;
70 }
71 
72 
73 
check_input_changes(struct gl_context * ctx)74 static GLuint check_input_changes( struct gl_context *ctx )
75 {
76    TNLcontext *tnl = TNL_CONTEXT(ctx);
77    GLuint i;
78 
79    for (i = 0; i <= _TNL_LAST_MAT; i++) {
80       if (tnl->vb.AttribPtr[i]->size != tnl->pipeline.last_attrib_size[i] ||
81 	  tnl->vb.AttribPtr[i]->stride != tnl->pipeline.last_attrib_stride[i]) {
82 	 tnl->pipeline.last_attrib_size[i] = tnl->vb.AttribPtr[i]->size;
83 	 tnl->pipeline.last_attrib_stride[i] = tnl->vb.AttribPtr[i]->stride;
84 	 tnl->pipeline.input_changes |= 1<<i;
85       }
86    }
87 
88    return tnl->pipeline.input_changes;
89 }
90 
91 
check_output_changes(struct gl_context * ctx)92 static GLuint check_output_changes( struct gl_context *ctx )
93 {
94 #if 0
95    TNLcontext *tnl = TNL_CONTEXT(ctx);
96 
97    for (i = 0; i < VARYING_SLOT_MAX; i++) {
98       if (tnl->vb.ResultPtr[i]->size != tnl->last_result_size[i] ||
99 	  tnl->vb.ResultPtr[i]->stride != tnl->last_result_stride[i]) {
100 	 tnl->last_result_size[i] = tnl->vb.ResultPtr[i]->size;
101 	 tnl->last_result_stride[i] = tnl->vb.ResultPtr[i]->stride;
102 	 tnl->pipeline.output_changes |= 1<<i;
103       }
104    }
105 
106    if (tnl->pipeline.output_changes)
107       tnl->Driver.NotifyOutputChanges( ctx, tnl->pipeline.output_changes );
108 
109    return tnl->pipeline.output_changes;
110 #else
111    return ~0;
112 #endif
113 }
114 
115 /**
116  * START/END_FAST_MATH macros:
117  *
118  * START_FAST_MATH: Set x86 FPU to faster, 32-bit precision mode (and save
119  *                  original mode to a temporary).
120  * END_FAST_MATH: Restore x86 FPU to original mode.
121  */
122 #if defined(__GNUC__) && defined(__i386__)
123 /*
124  * Set the x86 FPU control word to guarentee only 32 bits of precision
125  * are stored in registers.  Allowing the FPU to store more introduces
126  * differences between situations where numbers are pulled out of memory
127  * vs. situations where the compiler is able to optimize register usage.
128  *
129  * In the worst case, we force the compiler to use a memory access to
130  * truncate the float, by specifying the 'volatile' keyword.
131  */
132 /* Hardware default: All exceptions masked, extended double precision,
133  * round to nearest (IEEE compliant):
134  */
135 #define DEFAULT_X86_FPU		0x037f
136 /* All exceptions masked, single precision, round to nearest:
137  */
138 #define FAST_X86_FPU		0x003f
139 /* The fldcw instruction will cause any pending FP exceptions to be
140  * raised prior to entering the block, and we clear any pending
141  * exceptions before exiting the block.  Hence, asm code has free
142  * reign over the FPU while in the fast math block.
143  */
144 #if defined(NO_FAST_MATH)
145 #define START_FAST_MATH(x)						\
146 do {									\
147    static GLuint mask = DEFAULT_X86_FPU;				\
148    __asm__ ( "fnstcw %0" : "=m" (*&(x)) );				\
149    __asm__ ( "fldcw %0" : : "m" (mask) );				\
150 } while (0)
151 #else
152 #define START_FAST_MATH(x)						\
153 do {									\
154    static GLuint mask = FAST_X86_FPU;					\
155    __asm__ ( "fnstcw %0" : "=m" (*&(x)) );				\
156    __asm__ ( "fldcw %0" : : "m" (mask) );				\
157 } while (0)
158 #endif
159 /* Restore original FPU mode, and clear any exceptions that may have
160  * occurred in the FAST_MATH block.
161  */
162 #define END_FAST_MATH(x)						\
163 do {									\
164    __asm__ ( "fnclex ; fldcw %0" : : "m" (*&(x)) );			\
165 } while (0)
166 
167 #elif defined(_MSC_VER) && defined(_M_IX86)
168 #define DEFAULT_X86_FPU		0x037f /* See GCC comments above */
169 #define FAST_X86_FPU		0x003f /* See GCC comments above */
170 #if defined(NO_FAST_MATH)
171 #define START_FAST_MATH(x) do {\
172 	static GLuint mask = DEFAULT_X86_FPU;\
173 	__asm fnstcw word ptr [x]\
174 	__asm fldcw word ptr [mask]\
175 } while(0)
176 #else
177 #define START_FAST_MATH(x) do {\
178 	static GLuint mask = FAST_X86_FPU;\
179 	__asm fnstcw word ptr [x]\
180 	__asm fldcw word ptr [mask]\
181 } while(0)
182 #endif
183 #define END_FAST_MATH(x) do {\
184 	__asm fnclex\
185 	__asm fldcw word ptr [x]\
186 } while(0)
187 
188 #else
189 #define START_FAST_MATH(x)  x = 0
190 #define END_FAST_MATH(x)  (void)(x)
191 #endif
192 
193 
_tnl_run_pipeline(struct gl_context * ctx)194 void _tnl_run_pipeline( struct gl_context *ctx )
195 {
196    TNLcontext *tnl = TNL_CONTEXT(ctx);
197    unsigned short __tmp;
198    GLuint i;
199 
200    if (!tnl->vb.Count)
201       return;
202 
203    /* Check for changed input sizes or change in stride to/from zero
204     * (ie const or non-const).
205     */
206    if (check_input_changes( ctx ) || tnl->pipeline.new_state) {
207       if (ctx->VertexProgram._MaintainTnlProgram)
208 	 _tnl_UpdateFixedFunctionProgram( ctx );
209 
210       for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
211 	 struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
212 	 if (s->validate)
213 	    s->validate( ctx, s );
214       }
215 
216       tnl->pipeline.new_state = 0;
217       tnl->pipeline.input_changes = 0;
218 
219       /* Pipeline can only change its output in response to either a
220        * statechange or an input size/stride change.  No other changes
221        * are allowed.
222        */
223       if (check_output_changes( ctx ))
224 	 _tnl_notify_pipeline_output_change( ctx );
225    }
226 
227 #ifndef _OPENMP
228    /* Don't adjust FPU precision mode in case multiple threads are to be used.
229     * This would require that the additional threads also changed the FPU mode
230     * which is quite a mess as this had to be done in all parallelized sections;
231     * otherwise the master thread and all other threads are running in different
232     * modes, producing inconsistent results.
233     * Note that all x64 implementations don't define/use START_FAST_MATH, so
234     * this is "hack" is only used in i386 mode
235     */
236    START_FAST_MATH(__tmp);
237 #endif
238 
239    for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
240       struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
241       if (!s->run( ctx, s ))
242 	 break;
243    }
244 
245 #ifndef _OPENMP
246    END_FAST_MATH(__tmp);
247 #endif
248 }
249 
250 
251 
252 /* The default pipeline.  This is useful for software rasterizers, and
253  * simple hardware rasterizers.  For customization, I don't recommend
254  * tampering with the internals of these stages in the way that
255  * drivers did in Mesa 3.4.  These stages are basically black boxes,
256  * and should be left intact.
257  *
258  * To customize the pipeline, consider:
259  *
260  * - removing redundant stages (making sure that the software rasterizer
261  *   can cope with this on fallback paths).  An example is fog
262  *   coordinate generation, which is not required in the FX driver.
263  *
264  * - replacing general-purpose machine-independent stages with
265  *   general-purpose machine-specific stages.  There is no example of
266  *   this to date, though it must be borne in mind that all subsequent
267  *   stages that reference the output of the new stage must cope with
268  *   any machine-specific data introduced.  This may not be easy
269  *   unless there are no such stages (ie the new stage is the last in
270  *   the pipe).
271  *
272  * - inserting optimized (but specialized) stages ahead of the
273  *   general-purpose fallback implementation.  For example, the old
274  *   fastpath mechanism, which only works when the VB->Elts input is
275  *   available, can be duplicated by placing the fastpath stage at the
276  *   head of this pipeline.  Such specialized stages are currently
277  *   constrained to have no outputs (ie. they must either finish the *
278  *   pipeline by returning GL_FALSE from run(), or do nothing).
279  *
280  * Some work can be done to lift some of the restrictions in the final
281  * case, if it becomes necessary to do so.
282  */
283 const struct tnl_pipeline_stage *_tnl_default_pipeline[] = {
284    &_tnl_vertex_transform_stage,
285    &_tnl_normal_transform_stage,
286    &_tnl_lighting_stage,
287    &_tnl_texgen_stage,
288    &_tnl_texture_transform_stage,
289    &_tnl_point_attenuation_stage,
290    &_tnl_vertex_program_stage,
291    &_tnl_fog_coordinate_stage,
292    &_tnl_render_stage,
293    NULL
294 };
295 
296 const struct tnl_pipeline_stage *_tnl_vp_pipeline[] = {
297    &_tnl_vertex_program_stage,
298    &_tnl_render_stage,
299    NULL
300 };
301