1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "main/glheader.h"
25 #include "main/context.h"
26 #include "main/macros.h"
27 #include "program.h"
28 #include "prog_instruction.h"
29 #include "prog_optimize.h"
30 #include "prog_parameter.h"
31 #include <stdbool.h>
32 
33 static bool
src_regs_are_constant(const struct prog_instruction * inst,unsigned num_srcs)34 src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
35 {
36    unsigned i;
37 
38    for (i = 0; i < num_srcs; i++) {
39       if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
40 	 return false;
41       if (inst->SrcReg[i].RelAddr)
42          return false;
43    }
44 
45    return true;
46 }
47 
48 static struct prog_src_register
src_reg_for_float(struct gl_program * prog,float val)49 src_reg_for_float(struct gl_program *prog, float val)
50 {
51    struct prog_src_register src;
52    unsigned swiz;
53 
54    memset(&src, 0, sizeof(src));
55 
56    src.File = PROGRAM_CONSTANT;
57    src.Index = _mesa_add_unnamed_constant(prog->Parameters,
58 					  (gl_constant_value *) &val, 1, &swiz);
59    src.Swizzle = swiz;
60    return src;
61 }
62 
63 static struct prog_src_register
src_reg_for_vec4(struct gl_program * prog,const float * val)64 src_reg_for_vec4(struct gl_program *prog, const float *val)
65 {
66    struct prog_src_register src;
67    unsigned swiz;
68 
69    memset(&src, 0, sizeof(src));
70 
71    src.File = PROGRAM_CONSTANT;
72    src.Index = _mesa_add_unnamed_constant(prog->Parameters,
73 					  (gl_constant_value *) val, 4, &swiz);
74    src.Swizzle = swiz;
75    return src;
76 }
77 
78 static bool
src_regs_are_same(const struct prog_src_register * a,const struct prog_src_register * b)79 src_regs_are_same(const struct prog_src_register *a,
80 		  const struct prog_src_register *b)
81 {
82    return (a->File == b->File)
83       && (a->Index == b->Index)
84       && (a->Swizzle == b->Swizzle)
85       && (a->Negate == b->Negate)
86       && (a->RelAddr == 0)
87       && (b->RelAddr == 0);
88 }
89 
90 static void
get_value(struct gl_program * prog,struct prog_src_register * r,float * data)91 get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
92 {
93    unsigned pvo = prog->Parameters->ParameterValueOffset[r->Index];
94    const gl_constant_value *const value =
95       prog->Parameters->ParameterValues + pvo;
96 
97    data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
98    data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
99    data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
100    data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
101 
102    if (r->Negate & 0x01) {
103       data[0] = -data[0];
104    }
105 
106    if (r->Negate & 0x02) {
107       data[1] = -data[1];
108    }
109 
110    if (r->Negate & 0x04) {
111       data[2] = -data[2];
112    }
113 
114    if (r->Negate & 0x08) {
115       data[3] = -data[3];
116    }
117 }
118 
119 /**
120  * Try to replace instructions that produce a constant result with simple moves
121  *
122  * The hope is that a following copy propagation pass will eliminate the
123  * unnecessary move instructions.
124  */
125 GLboolean
_mesa_constant_fold(struct gl_program * prog)126 _mesa_constant_fold(struct gl_program *prog)
127 {
128    bool progress = false;
129    unsigned i;
130 
131    for (i = 0; i < prog->arb.NumInstructions; i++) {
132       struct prog_instruction *const inst = &prog->arb.Instructions[i];
133 
134       switch (inst->Opcode) {
135       case OPCODE_ADD:
136 	 if (src_regs_are_constant(inst, 2)) {
137 	    float a[4];
138 	    float b[4];
139 	    float result[4];
140 
141 	    get_value(prog, &inst->SrcReg[0], a);
142 	    get_value(prog, &inst->SrcReg[1], b);
143 
144 	    result[0] = a[0] + b[0];
145 	    result[1] = a[1] + b[1];
146 	    result[2] = a[2] + b[2];
147 	    result[3] = a[3] + b[3];
148 
149 	    inst->Opcode = OPCODE_MOV;
150 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
151 
152 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
153 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
154 
155 	    progress = true;
156 	 }
157 	 break;
158 
159       case OPCODE_CMP:
160 	 /* FINISHME: We could also optimize CMP instructions where the first
161 	  * FINISHME: source is a constant that is either all < 0.0 or all
162 	  * FINISHME: >= 0.0.
163 	  */
164 	 if (src_regs_are_constant(inst, 3)) {
165 	    float a[4];
166 	    float b[4];
167 	    float c[4];
168 	    float result[4];
169 
170 	    get_value(prog, &inst->SrcReg[0], a);
171 	    get_value(prog, &inst->SrcReg[1], b);
172 	    get_value(prog, &inst->SrcReg[2], c);
173 
174             result[0] = a[0] < 0.0f ? b[0] : c[0];
175             result[1] = a[1] < 0.0f ? b[1] : c[1];
176             result[2] = a[2] < 0.0f ? b[2] : c[2];
177             result[3] = a[3] < 0.0f ? b[3] : c[3];
178 
179 	    inst->Opcode = OPCODE_MOV;
180 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
181 
182 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
183 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
184 	    inst->SrcReg[2].File = PROGRAM_UNDEFINED;
185 	    inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
186 
187 	    progress = true;
188 	 }
189 	 break;
190 
191       case OPCODE_DP2:
192       case OPCODE_DP3:
193       case OPCODE_DP4:
194 	 if (src_regs_are_constant(inst, 2)) {
195 	    float a[4];
196 	    float b[4];
197 	    float result;
198 
199 	    get_value(prog, &inst->SrcReg[0], a);
200 	    get_value(prog, &inst->SrcReg[1], b);
201 
202 	    result = (a[0] * b[0]) + (a[1] * b[1]);
203 
204 	    if (inst->Opcode >= OPCODE_DP3)
205 	       result += a[2] * b[2];
206 
207 	    if (inst->Opcode == OPCODE_DP4)
208 	       result += a[3] * b[3];
209 
210 	    inst->Opcode = OPCODE_MOV;
211 	    inst->SrcReg[0] = src_reg_for_float(prog, result);
212 
213 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
214 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
215 
216 	    progress = true;
217 	 }
218 	 break;
219 
220       case OPCODE_MUL:
221 	 if (src_regs_are_constant(inst, 2)) {
222 	    float a[4];
223 	    float b[4];
224 	    float result[4];
225 
226 	    get_value(prog, &inst->SrcReg[0], a);
227 	    get_value(prog, &inst->SrcReg[1], b);
228 
229 	    result[0] = a[0] * b[0];
230 	    result[1] = a[1] * b[1];
231 	    result[2] = a[2] * b[2];
232 	    result[3] = a[3] * b[3];
233 
234 	    inst->Opcode = OPCODE_MOV;
235 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
236 
237 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
238 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
239 
240 	    progress = true;
241 	 }
242 	 break;
243 
244       case OPCODE_SGE:
245 	 if (src_regs_are_constant(inst, 2)) {
246 	    float a[4];
247 	    float b[4];
248 	    float result[4];
249 
250 	    get_value(prog, &inst->SrcReg[0], a);
251 	    get_value(prog, &inst->SrcReg[1], b);
252 
253 	    result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
254 	    result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
255 	    result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
256 	    result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
257 
258 	    inst->Opcode = OPCODE_MOV;
259 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
260 
261 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
262 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
263 
264 	    progress = true;
265 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
266 	    inst->Opcode = OPCODE_MOV;
267 	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
268 
269 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
270 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
271 
272 	    progress = true;
273 	 }
274 	 break;
275 
276       case OPCODE_SLT:
277 	 if (src_regs_are_constant(inst, 2)) {
278 	    float a[4];
279 	    float b[4];
280 	    float result[4];
281 
282 	    get_value(prog, &inst->SrcReg[0], a);
283 	    get_value(prog, &inst->SrcReg[1], b);
284 
285 	    result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
286 	    result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
287 	    result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
288 	    result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
289 
290 	    inst->Opcode = OPCODE_MOV;
291 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
292 
293 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
294 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
295 
296 	    progress = true;
297 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
298 	    inst->Opcode = OPCODE_MOV;
299 	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
300 
301 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
302 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
303 
304 	    progress = true;
305 	 }
306 	 break;
307 
308       default:
309 	 break;
310       }
311    }
312 
313    return progress;
314 }
315