1 /**************************************************************************
2  *
3  * Copyright 2011 The Chromium OS authors.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "i915_reg.h"
29 #include "i915_context.h"
30 #include "i915_fpc.h"
31 
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_string.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "tgsi/tgsi_dump.h"
38 
same_src_dst_reg(struct i915_full_src_register * s1,struct i915_full_dst_register * d1)39 static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1)
40 {
41    return (s1->Register.File == d1->Register.File &&
42            s1->Register.Indirect == d1->Register.Indirect &&
43            s1->Register.Dimension == d1->Register.Dimension &&
44            s1->Register.Index == d1->Register.Index);
45 }
46 
same_dst_reg(struct i915_full_dst_register * d1,struct i915_full_dst_register * d2)47 static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
48 {
49    return (d1->Register.File == d2->Register.File &&
50            d1->Register.Indirect == d2->Register.Indirect &&
51            d1->Register.Dimension == d2->Register.Dimension &&
52            d1->Register.Index == d2->Register.Index);
53 }
54 
same_src_reg(struct i915_full_src_register * d1,struct i915_full_src_register * d2)55 static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
56 {
57    return (d1->Register.File == d2->Register.File &&
58            d1->Register.Indirect == d2->Register.Indirect &&
59            d1->Register.Dimension == d2->Register.Dimension &&
60            d1->Register.Index == d2->Register.Index &&
61            d1->Register.Absolute == d2->Register.Absolute &&
62            d1->Register.Negate == d2->Register.Negate);
63 }
64 
has_destination(unsigned opcode)65 static boolean has_destination(unsigned opcode)
66 {
67    return (opcode != TGSI_OPCODE_NOP &&
68            opcode != TGSI_OPCODE_KIL &&
69            opcode != TGSI_OPCODE_KILP &&
70            opcode != TGSI_OPCODE_END &&
71            opcode != TGSI_OPCODE_RET);
72 }
73 
is_unswizzled(struct i915_full_src_register * r,unsigned write_mask)74 static boolean is_unswizzled(struct i915_full_src_register* r,
75                              unsigned write_mask)
76 {
77    if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
78       return FALSE;
79    if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
80       return FALSE;
81    if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
82       return FALSE;
83    if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
84       return FALSE;
85    return TRUE;
86 }
87 
op_commutes(unsigned opcode)88 static boolean op_commutes(unsigned opcode)
89 {
90    switch(opcode)
91    {
92       case TGSI_OPCODE_ADD:
93       case TGSI_OPCODE_MUL:
94       case TGSI_OPCODE_DP2:
95       case TGSI_OPCODE_DP3:
96       case TGSI_OPCODE_DP4:
97          return TRUE;
98    }
99    return FALSE;
100 }
101 
op_neutral_element(unsigned opcode)102 static unsigned op_neutral_element(unsigned opcode)
103 {
104    switch(opcode)
105    {
106       case TGSI_OPCODE_ADD:
107          return TGSI_SWIZZLE_ZERO;
108       case TGSI_OPCODE_MUL:
109       case TGSI_OPCODE_DP2:
110       case TGSI_OPCODE_DP3:
111       case TGSI_OPCODE_DP4:
112          return TGSI_SWIZZLE_ONE;
113    }
114 
115    debug_printf("Unknown opcode %d\n",opcode);
116    return TGSI_SWIZZLE_ZERO;
117 }
118 
119 /*
120  * Sets the swizzle to the neutral element for the operation for the bits
121  * of writemask which are set, swizzle to identity otherwise.
122  */
set_neutral_element_swizzle(struct i915_full_src_register * r,unsigned write_mask,unsigned neutral)123 static void set_neutral_element_swizzle(struct i915_full_src_register* r,
124                                         unsigned write_mask,
125                                         unsigned neutral)
126 {
127    if ( write_mask & TGSI_WRITEMASK_X )
128       r->Register.SwizzleX = neutral;
129    else
130       r->Register.SwizzleX = TGSI_SWIZZLE_X;
131 
132    if ( write_mask & TGSI_WRITEMASK_Y )
133       r->Register.SwizzleY = neutral;
134    else
135       r->Register.SwizzleY = TGSI_SWIZZLE_Y;
136 
137    if ( write_mask & TGSI_WRITEMASK_Z )
138       r->Register.SwizzleZ = neutral;
139    else
140       r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
141 
142    if ( write_mask & TGSI_WRITEMASK_W )
143       r->Register.SwizzleW = neutral;
144    else
145       r->Register.SwizzleW = TGSI_SWIZZLE_W;
146 }
147 
copy_src_reg(struct i915_src_register * o,const struct tgsi_src_register * i)148 static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
149 {
150    o->File      = i->File;
151    o->Indirect  = i->Indirect;
152    o->Dimension = i->Dimension;
153    o->Index     = i->Index;
154    o->SwizzleX  = i->SwizzleX;
155    o->SwizzleY  = i->SwizzleY;
156    o->SwizzleZ  = i->SwizzleZ;
157    o->SwizzleW  = i->SwizzleW;
158    o->Absolute  = i->Absolute;
159    o->Negate    = i->Negate;
160 }
161 
copy_dst_reg(struct i915_dst_register * o,const struct tgsi_dst_register * i)162 static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
163 {
164    o->File      = i->File;
165    o->WriteMask = i->WriteMask;
166    o->Indirect  = i->Indirect;
167    o->Dimension = i->Dimension;
168    o->Index     = i->Index;
169 }
170 
copy_instruction(struct i915_full_instruction * o,const struct tgsi_full_instruction * i)171 static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
172 {
173    memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
174    memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
175 
176    copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
177 
178    copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
179    copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
180    copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
181 }
182 
copy_token(union i915_full_token * o,union tgsi_full_token * i)183 static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
184 {
185    if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
186       memcpy(o, i, sizeof(*o));
187    else
188       copy_instruction(&o->FullInstruction, &i->FullInstruction);
189 
190 }
191 
192 /*
193  * Optimize away things like:
194  *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
195  *    MOV OUT[0].w, TEMP[2]
196  * into:
197  *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
198  * This is useful for optimizing texenv.
199  */
i915_fpc_optimize_mov_after_alu(union i915_full_token * current,union i915_full_token * next)200 static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next)
201 {
202    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
203         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
204         op_commutes(current->FullInstruction.Instruction.Opcode) &&
205         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
206         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
207         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
208         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
209         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
210         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
211         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
212         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
213    {
214       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
215 
216       set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
217       set_neutral_element_swizzle(&current->FullInstruction.Src[0],
218                                   next->FullInstruction.Dst[0].Register.WriteMask,
219                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
220 
221       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
222                                                            next->FullInstruction.Dst[0].Register.WriteMask;
223       return;
224    }
225 
226    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
227         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
228         op_commutes(current->FullInstruction.Instruction.Opcode) &&
229         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
230         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
231         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
232         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
233         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
234         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
235         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
236         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
237    {
238       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
239 
240       set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
241       set_neutral_element_swizzle(&current->FullInstruction.Src[1],
242                                   next->FullInstruction.Dst[0].Register.WriteMask,
243                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
244 
245       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
246                                                            next->FullInstruction.Dst[0].Register.WriteMask;
247       return;
248    }
249 }
250 
251 /*
252  * Optimize away things like:
253  *    MOV TEMP[0].xyz TEMP[0].xyzx
254  * into:
255  *    NOP
256  */
i915_fpc_useless_mov(union tgsi_full_token * tgsi_current)257 static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current)
258 {
259    union i915_full_token current;
260    copy_token(&current , tgsi_current);
261    if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
262         current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
263         has_destination(current.FullInstruction.Instruction.Opcode) &&
264         current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
265         current.FullInstruction.Src[0].Register.Absolute == 0 &&
266         current.FullInstruction.Src[0].Register.Negate == 0 &&
267         is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
268         same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
269    {
270       return TRUE;
271    }
272    return FALSE;
273 }
274 
275 /*
276  * Optimize away things like:
277  *    *** TEMP[0], TEMP[1], TEMP[2]
278  *    MOV OUT[0] TEMP[0]
279  * into:
280  *    *** OUT[0], TEMP[1], TEMP[2]
281  */
i915_fpc_optimize_useless_mov_after_inst(union i915_full_token * current,union i915_full_token * next)282 static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next)
283 {
284    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
285         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
286         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
287         has_destination(current->FullInstruction.Instruction.Opcode) &&
288         next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
289         next->FullInstruction.Src[0].Register.Absolute == 0 &&
290         next->FullInstruction.Src[0].Register.Negate == 0 &&
291         next->FullInstruction.Dst[0].Register.File == TGSI_FILE_OUTPUT &&
292         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
293         current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
294         same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
295    {
296       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
297 
298       current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
299       return;
300    }
301 }
302 
i915_optimize(const struct tgsi_token * tokens)303 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
304 {
305    struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
306    struct tgsi_parse_context parse;
307    int i = 0;
308 
309    out_tokens->NumTokens = 0;
310 
311    /* Count the tokens */
312    tgsi_parse_init( &parse, tokens );
313    while( !tgsi_parse_end_of_tokens( &parse ) ) {
314       tgsi_parse_token( &parse );
315       out_tokens->NumTokens++;
316    }
317    tgsi_parse_free (&parse);
318 
319    /* Allocate our tokens */
320    out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
321 
322    tgsi_parse_init( &parse, tokens );
323    while( !tgsi_parse_end_of_tokens( &parse ) ) {
324       tgsi_parse_token( &parse );
325 
326       if (i915_fpc_useless_mov(&parse.FullToken)) {
327          out_tokens->NumTokens--;
328          continue;
329       }
330 
331       copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
332 
333       if (i > 0) {
334          i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
335          i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
336       }
337       i++;
338    }
339    tgsi_parse_free (&parse);
340 
341    return out_tokens;
342 }
343 
i915_optimize_free(struct i915_token_list * tokens)344 void i915_optimize_free(struct i915_token_list* tokens)
345 {
346    free(tokens->Tokens);
347    free(tokens);
348 }
349 
350 
351