1 /**************************************************************************
2 *
3 * Copyright 2011 The Chromium OS authors.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "i915_reg.h"
29 #include "i915_context.h"
30 #include "i915_fpc.h"
31
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38
same_src_dst_reg(struct i915_full_src_register * s1,struct i915_full_dst_register * d1)39 static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1)
40 {
41 return (s1->Register.File == d1->Register.File &&
42 s1->Register.Indirect == d1->Register.Indirect &&
43 s1->Register.Dimension == d1->Register.Dimension &&
44 s1->Register.Index == d1->Register.Index);
45 }
46
same_dst_reg(struct i915_full_dst_register * d1,struct i915_full_dst_register * d2)47 static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
48 {
49 return (d1->Register.File == d2->Register.File &&
50 d1->Register.Indirect == d2->Register.Indirect &&
51 d1->Register.Dimension == d2->Register.Dimension &&
52 d1->Register.Index == d2->Register.Index);
53 }
54
same_src_reg(struct i915_full_src_register * d1,struct i915_full_src_register * d2)55 static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
56 {
57 return (d1->Register.File == d2->Register.File &&
58 d1->Register.Indirect == d2->Register.Indirect &&
59 d1->Register.Dimension == d2->Register.Dimension &&
60 d1->Register.Index == d2->Register.Index &&
61 d1->Register.Absolute == d2->Register.Absolute &&
62 d1->Register.Negate == d2->Register.Negate);
63 }
64
has_destination(unsigned opcode)65 static boolean has_destination(unsigned opcode)
66 {
67 return (opcode != TGSI_OPCODE_NOP &&
68 opcode != TGSI_OPCODE_KIL &&
69 opcode != TGSI_OPCODE_KILP &&
70 opcode != TGSI_OPCODE_END &&
71 opcode != TGSI_OPCODE_RET);
72 }
73
is_unswizzled(struct i915_full_src_register * r,unsigned write_mask)74 static boolean is_unswizzled(struct i915_full_src_register* r,
75 unsigned write_mask)
76 {
77 if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
78 return FALSE;
79 if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
80 return FALSE;
81 if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
82 return FALSE;
83 if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
84 return FALSE;
85 return TRUE;
86 }
87
op_commutes(unsigned opcode)88 static boolean op_commutes(unsigned opcode)
89 {
90 switch(opcode)
91 {
92 case TGSI_OPCODE_ADD:
93 case TGSI_OPCODE_MUL:
94 case TGSI_OPCODE_DP2:
95 case TGSI_OPCODE_DP3:
96 case TGSI_OPCODE_DP4:
97 return TRUE;
98 }
99 return FALSE;
100 }
101
op_neutral_element(unsigned opcode)102 static unsigned op_neutral_element(unsigned opcode)
103 {
104 switch(opcode)
105 {
106 case TGSI_OPCODE_ADD:
107 return TGSI_SWIZZLE_ZERO;
108 case TGSI_OPCODE_MUL:
109 case TGSI_OPCODE_DP2:
110 case TGSI_OPCODE_DP3:
111 case TGSI_OPCODE_DP4:
112 return TGSI_SWIZZLE_ONE;
113 }
114
115 debug_printf("Unknown opcode %d\n",opcode);
116 return TGSI_SWIZZLE_ZERO;
117 }
118
119 /*
120 * Sets the swizzle to the neutral element for the operation for the bits
121 * of writemask which are set, swizzle to identity otherwise.
122 */
set_neutral_element_swizzle(struct i915_full_src_register * r,unsigned write_mask,unsigned neutral)123 static void set_neutral_element_swizzle(struct i915_full_src_register* r,
124 unsigned write_mask,
125 unsigned neutral)
126 {
127 if ( write_mask & TGSI_WRITEMASK_X )
128 r->Register.SwizzleX = neutral;
129 else
130 r->Register.SwizzleX = TGSI_SWIZZLE_X;
131
132 if ( write_mask & TGSI_WRITEMASK_Y )
133 r->Register.SwizzleY = neutral;
134 else
135 r->Register.SwizzleY = TGSI_SWIZZLE_Y;
136
137 if ( write_mask & TGSI_WRITEMASK_Z )
138 r->Register.SwizzleZ = neutral;
139 else
140 r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
141
142 if ( write_mask & TGSI_WRITEMASK_W )
143 r->Register.SwizzleW = neutral;
144 else
145 r->Register.SwizzleW = TGSI_SWIZZLE_W;
146 }
147
copy_src_reg(struct i915_src_register * o,const struct tgsi_src_register * i)148 static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
149 {
150 o->File = i->File;
151 o->Indirect = i->Indirect;
152 o->Dimension = i->Dimension;
153 o->Index = i->Index;
154 o->SwizzleX = i->SwizzleX;
155 o->SwizzleY = i->SwizzleY;
156 o->SwizzleZ = i->SwizzleZ;
157 o->SwizzleW = i->SwizzleW;
158 o->Absolute = i->Absolute;
159 o->Negate = i->Negate;
160 }
161
copy_dst_reg(struct i915_dst_register * o,const struct tgsi_dst_register * i)162 static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
163 {
164 o->File = i->File;
165 o->WriteMask = i->WriteMask;
166 o->Indirect = i->Indirect;
167 o->Dimension = i->Dimension;
168 o->Index = i->Index;
169 }
170
copy_instruction(struct i915_full_instruction * o,const struct tgsi_full_instruction * i)171 static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
172 {
173 memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
174 memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
175
176 copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
177
178 copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
179 copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
180 copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
181 }
182
copy_token(union i915_full_token * o,union tgsi_full_token * i)183 static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
184 {
185 if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
186 memcpy(o, i, sizeof(*o));
187 else
188 copy_instruction(&o->FullInstruction, &i->FullInstruction);
189
190 }
191
192 /*
193 * Optimize away things like:
194 * MUL OUT[0].xyz, TEMP[1], TEMP[2]
195 * MOV OUT[0].w, TEMP[2]
196 * into:
197 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
198 * This is useful for optimizing texenv.
199 */
i915_fpc_optimize_mov_after_alu(union i915_full_token * current,union i915_full_token * next)200 static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next)
201 {
202 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
203 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
204 op_commutes(current->FullInstruction.Instruction.Opcode) &&
205 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
206 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
207 same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) &&
208 same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) &&
209 !same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) &&
210 is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
211 is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
212 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
213 {
214 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
215
216 set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0);
217 set_neutral_element_swizzle(¤t->FullInstruction.Src[0],
218 next->FullInstruction.Dst[0].Register.WriteMask,
219 op_neutral_element(current->FullInstruction.Instruction.Opcode));
220
221 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
222 next->FullInstruction.Dst[0].Register.WriteMask;
223 return;
224 }
225
226 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
227 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
228 op_commutes(current->FullInstruction.Instruction.Opcode) &&
229 current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
230 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
231 same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) &&
232 same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) &&
233 !same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) &&
234 is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
235 is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
236 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
237 {
238 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
239
240 set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0);
241 set_neutral_element_swizzle(¤t->FullInstruction.Src[1],
242 next->FullInstruction.Dst[0].Register.WriteMask,
243 op_neutral_element(current->FullInstruction.Instruction.Opcode));
244
245 current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
246 next->FullInstruction.Dst[0].Register.WriteMask;
247 return;
248 }
249 }
250
251 /*
252 * Optimize away things like:
253 * MOV TEMP[0].xyz TEMP[0].xyzx
254 * into:
255 * NOP
256 */
i915_fpc_useless_mov(union tgsi_full_token * tgsi_current)257 static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current)
258 {
259 union i915_full_token current;
260 copy_token(¤t , tgsi_current);
261 if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
262 current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
263 has_destination(current.FullInstruction.Instruction.Opcode) &&
264 current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
265 current.FullInstruction.Src[0].Register.Absolute == 0 &&
266 current.FullInstruction.Src[0].Register.Negate == 0 &&
267 is_unswizzled(¤t.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
268 same_src_dst_reg(¤t.FullInstruction.Src[0], ¤t.FullInstruction.Dst[0]) )
269 {
270 return TRUE;
271 }
272 return FALSE;
273 }
274
275 /*
276 * Optimize away things like:
277 * *** TEMP[0], TEMP[1], TEMP[2]
278 * MOV OUT[0] TEMP[0]
279 * into:
280 * *** OUT[0], TEMP[1], TEMP[2]
281 */
i915_fpc_optimize_useless_mov_after_inst(union i915_full_token * current,union i915_full_token * next)282 static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next)
283 {
284 if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
285 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
286 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
287 has_destination(current->FullInstruction.Instruction.Opcode) &&
288 next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
289 next->FullInstruction.Src[0].Register.Absolute == 0 &&
290 next->FullInstruction.Src[0].Register.Negate == 0 &&
291 next->FullInstruction.Dst[0].Register.File == TGSI_FILE_OUTPUT &&
292 is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
293 current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
294 same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) )
295 {
296 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
297
298 current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
299 return;
300 }
301 }
302
i915_optimize(const struct tgsi_token * tokens)303 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
304 {
305 struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
306 struct tgsi_parse_context parse;
307 int i = 0;
308
309 out_tokens->NumTokens = 0;
310
311 /* Count the tokens */
312 tgsi_parse_init( &parse, tokens );
313 while( !tgsi_parse_end_of_tokens( &parse ) ) {
314 tgsi_parse_token( &parse );
315 out_tokens->NumTokens++;
316 }
317 tgsi_parse_free (&parse);
318
319 /* Allocate our tokens */
320 out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
321
322 tgsi_parse_init( &parse, tokens );
323 while( !tgsi_parse_end_of_tokens( &parse ) ) {
324 tgsi_parse_token( &parse );
325
326 if (i915_fpc_useless_mov(&parse.FullToken)) {
327 out_tokens->NumTokens--;
328 continue;
329 }
330
331 copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
332
333 if (i > 0) {
334 i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
335 i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
336 }
337 i++;
338 }
339 tgsi_parse_free (&parse);
340
341 return out_tokens;
342 }
343
i915_optimize_free(struct i915_token_list * tokens)344 void i915_optimize_free(struct i915_token_list* tokens)
345 {
346 free(tokens->Tokens);
347 free(tokens);
348 }
349
350
351