1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file vc4_opt_peephole_sf.c
26  *
27  * Quick optimization to eliminate unused or identical SF updates.
28  */
29 
30 #include "vc4_qir.h"
31 #include "util/u_math.h"
32 
33 static bool debug;
34 
35 static void
dump_from(struct vc4_compile * c,struct qinst * inst,const char * type)36 dump_from(struct vc4_compile *c, struct qinst *inst, const char *type)
37 {
38         if (!debug)
39                 return;
40 
41         fprintf(stderr, "optimizing %s: ", type);
42         qir_dump_inst(c, inst);
43         fprintf(stderr, "\n");
44 }
45 
46 static void
dump_to(struct vc4_compile * c,struct qinst * inst)47 dump_to(struct vc4_compile *c, struct qinst *inst)
48 {
49         if (!debug)
50                 return;
51 
52         fprintf(stderr, "to: ");
53         qir_dump_inst(c, inst);
54         fprintf(stderr, "\n");
55 }
56 
57 static bool
inst_srcs_updated(struct qinst * inst,struct qinst * writer)58 inst_srcs_updated(struct qinst *inst, struct qinst *writer)
59 {
60         /* If the sources get overwritten, stop tracking the
61          * last instruction writing SF.
62          */
63         switch (writer->dst.file) {
64         case QFILE_TEMP:
65                 for (int i = 0; i < qir_get_nsrc(inst); i++) {
66                         if (inst->src[i].file == QFILE_TEMP &&
67                             inst->src[i].index == writer->dst.index) {
68                                 return true;
69                         }
70                 }
71                 return false;
72         default:
73                 return false;
74         }
75 }
76 
77 static bool
src_file_varies_on_reread(struct qreg reg)78 src_file_varies_on_reread(struct qreg reg)
79 {
80         switch (reg.file) {
81         case QFILE_VARY:
82         case QFILE_VPM:
83                 return true;
84         default:
85                 return false;
86         }
87 }
88 
89 static bool
inst_result_equals(struct qinst * a,struct qinst * b)90 inst_result_equals(struct qinst *a, struct qinst *b)
91 {
92         if (a->op != b->op ||
93             qir_depends_on_flags(a) ||
94             qir_depends_on_flags(b)) {
95                 return false;
96         }
97 
98         for (int i = 0; i < qir_get_nsrc(a); i++) {
99                 if (!qir_reg_equals(a->src[i], b->src[i]) ||
100                     src_file_varies_on_reread(a->src[i]) ||
101                     src_file_varies_on_reread(b->src[i])) {
102                         return false;
103                 }
104         }
105 
106         return true;
107 }
108 
109 static bool
qir_opt_peephole_sf_block(struct vc4_compile * c,struct qblock * block)110 qir_opt_peephole_sf_block(struct vc4_compile *c, struct qblock *block)
111 {
112         bool progress = false;
113         /* We don't have liveness dataflow analysis for flags, but we also
114          * never generate a use of flags across control flow, so just treat
115          * them as unused at block exit.
116          */
117         bool sf_live = false;
118         struct qinst *last_sf = NULL;
119 
120         /* Walk the block from bottom to top, tracking if the SF is used, and
121          * removing unused or repeated ones.
122          */
123         qir_for_each_inst_rev(inst, block) {
124                 if (inst->sf) {
125                         if (!sf_live) {
126                                 /* Our instruction's SF isn't read, so drop it.
127                                  */
128                                 dump_from(c, inst, "dead SF");
129                                 inst->sf = false;
130                                 dump_to(c, inst);
131                                 progress = true;
132                         } else if (last_sf &&
133                                    inst_result_equals(last_sf, inst)) {
134                                 /* The last_sf sets up same value as inst, so
135                                  * just drop the later one.
136                                  */
137                                 dump_from(c, last_sf, "repeated SF");
138                                 last_sf->sf = false;
139                                 dump_to(c, last_sf);
140                                 progress = true;
141                                 last_sf = inst;
142                         } else {
143                                 last_sf = inst;
144                         }
145                         sf_live = false;
146                 }
147 
148                 if (last_sf) {
149                         if (inst_srcs_updated(last_sf, inst))
150                                 last_sf = NULL;
151                 }
152 
153                 if (qir_depends_on_flags(inst))
154                         sf_live = true;
155         }
156 
157         return progress;
158 }
159 
160 bool
qir_opt_peephole_sf(struct vc4_compile * c)161 qir_opt_peephole_sf(struct vc4_compile *c)
162 {
163         bool progress = false;
164 
165         qir_for_each_block(block, c)
166                 progress = qir_opt_peephole_sf_block(c, block) || progress;
167 
168         return progress;
169 }
170