1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "main/glheader.h"
25 #include "main/context.h"
26 #include "main/macros.h"
27 #include "program.h"
28 #include "prog_instruction.h"
29 #include "prog_optimize.h"
30 #include "prog_parameter.h"
31 #include <stdbool.h>
32 
33 static bool
src_regs_are_constant(const struct prog_instruction * inst,unsigned num_srcs)34 src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
35 {
36    unsigned i;
37 
38    for (i = 0; i < num_srcs; i++) {
39       if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
40 	 return false;
41    }
42 
43    return true;
44 }
45 
46 static struct prog_src_register
src_reg_for_float(struct gl_program * prog,float val)47 src_reg_for_float(struct gl_program *prog, float val)
48 {
49    struct prog_src_register src;
50    unsigned swiz;
51 
52    memset(&src, 0, sizeof(src));
53 
54    src.File = PROGRAM_CONSTANT;
55    src.Index = _mesa_add_unnamed_constant(prog->Parameters,
56 					  (gl_constant_value *) &val, 1, &swiz);
57    src.Swizzle = swiz;
58    return src;
59 }
60 
61 static struct prog_src_register
src_reg_for_vec4(struct gl_program * prog,const float * val)62 src_reg_for_vec4(struct gl_program *prog, const float *val)
63 {
64    struct prog_src_register src;
65    unsigned swiz;
66 
67    memset(&src, 0, sizeof(src));
68 
69    src.File = PROGRAM_CONSTANT;
70    src.Index = _mesa_add_unnamed_constant(prog->Parameters,
71 					  (gl_constant_value *) val, 4, &swiz);
72    src.Swizzle = swiz;
73    return src;
74 }
75 
76 static bool
src_regs_are_same(const struct prog_src_register * a,const struct prog_src_register * b)77 src_regs_are_same(const struct prog_src_register *a,
78 		  const struct prog_src_register *b)
79 {
80    return (a->File == b->File)
81       && (a->Index == b->Index)
82       && (a->Swizzle == b->Swizzle)
83       && (a->Abs == b->Abs)
84       && (a->Negate == b->Negate)
85       && (a->RelAddr == 0)
86       && (b->RelAddr == 0);
87 }
88 
89 static void
get_value(struct gl_program * prog,struct prog_src_register * r,float * data)90 get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
91 {
92    const gl_constant_value *const value =
93       prog->Parameters->ParameterValues[r->Index];
94 
95    data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
96    data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
97    data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
98    data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
99 
100    if (r->Abs) {
101       data[0] = fabsf(data[0]);
102       data[1] = fabsf(data[1]);
103       data[2] = fabsf(data[2]);
104       data[3] = fabsf(data[3]);
105    }
106 
107    if (r->Negate & 0x01) {
108       data[0] = -data[0];
109    }
110 
111    if (r->Negate & 0x02) {
112       data[1] = -data[1];
113    }
114 
115    if (r->Negate & 0x04) {
116       data[2] = -data[2];
117    }
118 
119    if (r->Negate & 0x08) {
120       data[3] = -data[3];
121    }
122 }
123 
124 /**
125  * Try to replace instructions that produce a constant result with simple moves
126  *
127  * The hope is that a following copy propagation pass will eliminate the
128  * unnecessary move instructions.
129  */
130 GLboolean
_mesa_constant_fold(struct gl_program * prog)131 _mesa_constant_fold(struct gl_program *prog)
132 {
133    bool progress = false;
134    unsigned i;
135 
136    for (i = 0; i < prog->NumInstructions; i++) {
137       struct prog_instruction *const inst = &prog->Instructions[i];
138 
139       switch (inst->Opcode) {
140       case OPCODE_ADD:
141 	 if (src_regs_are_constant(inst, 2)) {
142 	    float a[4];
143 	    float b[4];
144 	    float result[4];
145 
146 	    get_value(prog, &inst->SrcReg[0], a);
147 	    get_value(prog, &inst->SrcReg[1], b);
148 
149 	    result[0] = a[0] + b[0];
150 	    result[1] = a[1] + b[1];
151 	    result[2] = a[2] + b[2];
152 	    result[3] = a[3] + b[3];
153 
154 	    inst->Opcode = OPCODE_MOV;
155 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
156 
157 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
158 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
159 
160 	    progress = true;
161 	 }
162 	 break;
163 
164       case OPCODE_CMP:
165 	 /* FINISHME: We could also optimize CMP instructions where the first
166 	  * FINISHME: source is a constant that is either all < 0.0 or all
167 	  * FINISHME: >= 0.0.
168 	  */
169 	 if (src_regs_are_constant(inst, 3)) {
170 	    float a[4];
171 	    float b[4];
172 	    float c[4];
173 	    float result[4];
174 
175 	    get_value(prog, &inst->SrcReg[0], a);
176 	    get_value(prog, &inst->SrcReg[1], b);
177 	    get_value(prog, &inst->SrcReg[2], c);
178 
179             result[0] = a[0] < 0.0f ? b[0] : c[0];
180             result[1] = a[1] < 0.0f ? b[1] : c[1];
181             result[2] = a[2] < 0.0f ? b[2] : c[2];
182             result[3] = a[3] < 0.0f ? b[3] : c[3];
183 
184 	    inst->Opcode = OPCODE_MOV;
185 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
186 
187 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
188 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
189 	    inst->SrcReg[2].File = PROGRAM_UNDEFINED;
190 	    inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
191 
192 	    progress = true;
193 	 }
194 	 break;
195 
196       case OPCODE_DP2:
197       case OPCODE_DP3:
198       case OPCODE_DP4:
199 	 if (src_regs_are_constant(inst, 2)) {
200 	    float a[4];
201 	    float b[4];
202 	    float result;
203 
204 	    get_value(prog, &inst->SrcReg[0], a);
205 	    get_value(prog, &inst->SrcReg[1], b);
206 
207 	    /* It seems like a loop could be used here, but we cleverly put
208 	     * DP2A between DP2 and DP3.  Subtracting DP2 (or similar) from
209 	     * the opcode results in various failures of the loop control.
210 	     */
211 	    result = (a[0] * b[0]) + (a[1] * b[1]);
212 
213 	    if (inst->Opcode >= OPCODE_DP3)
214 	       result += a[2] * b[2];
215 
216 	    if (inst->Opcode == OPCODE_DP4)
217 	       result += a[3] * b[3];
218 
219 	    inst->Opcode = OPCODE_MOV;
220 	    inst->SrcReg[0] = src_reg_for_float(prog, result);
221 
222 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
223 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
224 
225 	    progress = true;
226 	 }
227 	 break;
228 
229       case OPCODE_MUL:
230 	 if (src_regs_are_constant(inst, 2)) {
231 	    float a[4];
232 	    float b[4];
233 	    float result[4];
234 
235 	    get_value(prog, &inst->SrcReg[0], a);
236 	    get_value(prog, &inst->SrcReg[1], b);
237 
238 	    result[0] = a[0] * b[0];
239 	    result[1] = a[1] * b[1];
240 	    result[2] = a[2] * b[2];
241 	    result[3] = a[3] * b[3];
242 
243 	    inst->Opcode = OPCODE_MOV;
244 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
245 
246 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
247 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
248 
249 	    progress = true;
250 	 }
251 	 break;
252 
253       case OPCODE_SEQ:
254 	 if (src_regs_are_constant(inst, 2)) {
255 	    float a[4];
256 	    float b[4];
257 	    float result[4];
258 
259 	    get_value(prog, &inst->SrcReg[0], a);
260 	    get_value(prog, &inst->SrcReg[1], b);
261 
262 	    result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
263 	    result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
264 	    result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
265 	    result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
266 
267 	    inst->Opcode = OPCODE_MOV;
268 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
269 
270 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
271 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
272 
273 	    progress = true;
274 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
275 	    inst->Opcode = OPCODE_MOV;
276 	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
277 
278 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
279 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
280 
281 	    progress = true;
282 	 }
283 	 break;
284 
285       case OPCODE_SGE:
286 	 if (src_regs_are_constant(inst, 2)) {
287 	    float a[4];
288 	    float b[4];
289 	    float result[4];
290 
291 	    get_value(prog, &inst->SrcReg[0], a);
292 	    get_value(prog, &inst->SrcReg[1], b);
293 
294 	    result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
295 	    result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
296 	    result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
297 	    result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
298 
299 	    inst->Opcode = OPCODE_MOV;
300 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
301 
302 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
303 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
304 
305 	    progress = true;
306 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
307 	    inst->Opcode = OPCODE_MOV;
308 	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
309 
310 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
311 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
312 
313 	    progress = true;
314 	 }
315 	 break;
316 
317       case OPCODE_SGT:
318 	 if (src_regs_are_constant(inst, 2)) {
319 	    float a[4];
320 	    float b[4];
321 	    float result[4];
322 
323 	    get_value(prog, &inst->SrcReg[0], a);
324 	    get_value(prog, &inst->SrcReg[1], b);
325 
326 	    result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
327 	    result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
328 	    result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
329 	    result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
330 
331 	    inst->Opcode = OPCODE_MOV;
332 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
333 
334 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
335 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
336 
337 	    progress = true;
338 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
339 	    inst->Opcode = OPCODE_MOV;
340 	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
341 
342 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
343 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
344 
345 	    progress = true;
346 	 }
347 	 break;
348 
349       case OPCODE_SLE:
350 	 if (src_regs_are_constant(inst, 2)) {
351 	    float a[4];
352 	    float b[4];
353 	    float result[4];
354 
355 	    get_value(prog, &inst->SrcReg[0], a);
356 	    get_value(prog, &inst->SrcReg[1], b);
357 
358 	    result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
359 	    result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
360 	    result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
361 	    result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
362 
363 	    inst->Opcode = OPCODE_MOV;
364 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
365 
366 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
367 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
368 
369 	    progress = true;
370 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
371 	    inst->Opcode = OPCODE_MOV;
372 	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
373 
374 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
375 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
376 
377 	    progress = true;
378 	 }
379 	 break;
380 
381       case OPCODE_SLT:
382 	 if (src_regs_are_constant(inst, 2)) {
383 	    float a[4];
384 	    float b[4];
385 	    float result[4];
386 
387 	    get_value(prog, &inst->SrcReg[0], a);
388 	    get_value(prog, &inst->SrcReg[1], b);
389 
390 	    result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
391 	    result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
392 	    result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
393 	    result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
394 
395 	    inst->Opcode = OPCODE_MOV;
396 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
397 
398 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
399 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
400 
401 	    progress = true;
402 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
403 	    inst->Opcode = OPCODE_MOV;
404 	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
405 
406 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
407 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
408 
409 	    progress = true;
410 	 }
411 	 break;
412 
413       case OPCODE_SNE:
414 	 if (src_regs_are_constant(inst, 2)) {
415 	    float a[4];
416 	    float b[4];
417 	    float result[4];
418 
419 	    get_value(prog, &inst->SrcReg[0], a);
420 	    get_value(prog, &inst->SrcReg[1], b);
421 
422 	    result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
423 	    result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
424 	    result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
425 	    result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
426 
427 	    inst->Opcode = OPCODE_MOV;
428 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
429 
430 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
431 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
432 
433 	    progress = true;
434 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
435 	    inst->Opcode = OPCODE_MOV;
436 	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
437 
438 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
439 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
440 
441 	    progress = true;
442 	 }
443 	 break;
444 
445       default:
446 	 break;
447       }
448    }
449 
450    return progress;
451 }
452