1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #ifndef SB_SCHED_H_
28 #define SB_SCHED_H_
29 
30 namespace r600_sb {
31 
32 typedef sb_map<node*, unsigned> uc_map;
33 
34 // resource trackers for scheduler
35 // rp = read port
36 // uc = use count
37 
38 typedef sb_set<unsigned> kc_lines;
39 
40 class rp_kcache_tracker {
41 	unsigned rp[4];
42 	unsigned uc[4];
43 	const unsigned sel_count;
44 
kc_sel(sel_chan r)45 	unsigned kc_sel(sel_chan r) {
46 		return sel_count == 4 ? (unsigned)r : ((r - 1) >> 1) + 1;
47 	}
48 
49 public:
50 	rp_kcache_tracker(shader &sh);
51 
52 	bool try_reserve(node *n);
53 	void unreserve(node *n);
54 
55 
56 	bool try_reserve(sel_chan r);
57 	void unreserve(sel_chan r);
58 
59 	void reset();
60 
num_sels()61 	unsigned num_sels() { return !!rp[0] + !!rp[1] + !!rp[2] + !!rp[3]; }
62 
63 	unsigned get_lines(kc_lines &lines);
64 };
65 
66 class literal_tracker {
67 	literal lt[4];
68 	unsigned uc[4];
69 
70 public:
literal_tracker()71 	literal_tracker() : lt(), uc() {}
72 
73 	bool try_reserve(alu_node *n);
74 	void unreserve(alu_node *n);
75 
76 	bool try_reserve(literal l);
77 	void unreserve(literal l);
78 
79 	void reset();
80 
count()81 	unsigned count() { return !!uc[0] + !!uc[1] + !!uc[2] + !!uc[3]; }
82 
83 	void init_group_literals(alu_group_node *g);
84 
85 };
86 
87 class rp_gpr_tracker {
88 	// rp[cycle][elem]
89 	unsigned rp[3][4];
90 	unsigned uc[3][4];
91 
92 public:
rp_gpr_tracker()93 	rp_gpr_tracker() : rp(), uc() {}
94 
95 	bool try_reserve(alu_node *n);
96 	void unreserve(alu_node *n);
97 
98 	bool try_reserve(unsigned cycle, unsigned sel, unsigned chan);
99 	void unreserve(unsigned cycle, unsigned sel, unsigned chan);
100 
101 	void reset();
102 
103 	void dump();
104 };
105 
106 class alu_group_tracker {
107 
108 	shader &sh;
109 
110 	rp_kcache_tracker kc;
111 	rp_gpr_tracker gpr;
112 	literal_tracker lt;
113 
114 	alu_node * slots[5];
115 
116 	unsigned available_slots;
117 
118 	unsigned max_slots;
119 
120 	typedef std::map<value*, unsigned> value_index_map;
121 
122 	value_index_map vmap;
123 
124 	bool has_mova;
125 	bool uses_ar;
126 	bool has_predset;
127 	bool has_kill;
128 	bool updates_exec_mask;
129 
130 	bool consumes_lds_oqa;
131 	bool produces_lds_oqa;
132 	unsigned chan_count[4];
133 
134 	// param index + 1 (0 means that group doesn't refer to Params)
135 	// we can't use more than one param index in a group
136 	unsigned interp_param;
137 
138 	unsigned next_id;
139 
140 	node_vec packed_ops;
141 
142 	void assign_slot(unsigned slot, alu_node *n);
143 
144 public:
145 	alu_group_tracker(shader &sh);
146 
147 	// FIXME use fast bs correctness check (values for same chan <= 3) ??
148 	bool try_reserve(alu_node *n);
149 	bool try_reserve(alu_packed_node *p);
150 
151 	void reinit();
152 	void reset(bool keep_packed = false);
153 
154 	sel_chan get_value_id(value *v);
155 	void update_flags(alu_node *n);
156 
slot(unsigned i)157 	alu_node* slot(unsigned i) { return slots[i]; }
158 
used_slots()159 	unsigned used_slots() {
160 		return (~available_slots) & ((1 << max_slots) - 1);
161 	}
162 
inst_count()163 	unsigned inst_count() {
164 		return __builtin_popcount(used_slots());
165 	}
166 
literal_count()167 	unsigned literal_count() { return lt.count(); }
literal_slot_count()168 	unsigned literal_slot_count() { return (literal_count() + 1) >> 1; };
slot_count()169 	unsigned slot_count() { return inst_count() + literal_slot_count(); }
170 
get_consumes_lds_oqa()171 	bool get_consumes_lds_oqa() { return consumes_lds_oqa; }
get_produces_lds_oqa()172 	bool get_produces_lds_oqa() { return produces_lds_oqa; }
173 	alu_group_node* emit();
174 
kcache()175 	rp_kcache_tracker& kcache() { return kc; }
176 
has_update_exec_mask()177 	bool has_update_exec_mask() { return updates_exec_mask; }
avail_slots()178 	unsigned avail_slots() { return available_slots; }
179 
180 	void discard_all_slots(container_node &removed_nodes);
181 	void discard_slots(unsigned slot_mask, container_node &removed_nodes);
182 
has_ar_load()183 	bool has_ar_load() { return has_mova; }
184 };
185 
186 class alu_kcache_tracker {
187 	bc_kcache kc[4];
188 	sb_set<unsigned> lines;
189 	unsigned max_kcs;
190 
191 public:
192 
alu_kcache_tracker(sb_hw_class hc)193 	alu_kcache_tracker(sb_hw_class hc)
194 		: kc(), lines(), max_kcs(hc >= HW_CLASS_EVERGREEN ? 4 : 2) {}
195 
196 	void reset();
197 	bool try_reserve(alu_group_tracker &gt);
198 	bool update_kc();
init_clause(bc_cf & bc)199 	void init_clause(bc_cf &bc) {
200 		memcpy(bc.kc, kc, sizeof(kc));
201 	}
202 };
203 
204 class alu_clause_tracker {
205 	shader &sh;
206 
207 	alu_kcache_tracker kt;
208 	unsigned slot_count;
209 
210 	alu_group_tracker grp0;
211 	alu_group_tracker grp1;
212 
213 	unsigned group;
214 
215 	cf_node *clause;
216 
217 	bool push_exec_mask;
218 
219 	unsigned outstanding_lds_oqa_reads;
220 public:
221 	container_node conflict_nodes;
222 
223 	// current values of AR and PR registers that we have to preload
224 	// till the end of clause (in fact, beginning, because we're scheduling
225 	// bottom-up)
226 	value *current_ar;
227 	value *current_pr;
228 	// current values of CF_IDX registers that need preloading
229 	value *current_idx[2];
230 
231 	alu_clause_tracker(shader &sh);
232 
233 	void reset();
234 
235 	// current group
grp()236 	alu_group_tracker& grp() { return group ? grp1 : grp0; }
237 	// previous group
prev_grp()238 	alu_group_tracker& prev_grp() { return group ? grp0 : grp1; }
239 
240 	void emit_group();
241 	void emit_clause(container_node *c);
242 	bool check_clause_limits();
243 	void new_group();
244 	bool is_empty();
245 
246 	alu_node* create_ar_load(value *v, chan_select ar_channel);
247 
248 	void discard_current_group();
249 
total_slots()250 	unsigned total_slots() { return slot_count; }
251 };
252 
253 class post_scheduler : public pass {
254 
255 	container_node ready, ready_copies; // alu only
256 	container_node pending, bb_pending;
257 	bb_node *cur_bb;
258 	val_set live; // values live at the end of the alu clause
259 	uc_map ucm;
260 	alu_clause_tracker alu;
261 
262 	typedef std::map<sel_chan, value*> rv_map;
263 	rv_map regmap, prev_regmap;
264 
265 	val_set cleared_interf;
266 
267 	void emit_index_registers();
268 public:
269 
post_scheduler(shader & sh)270 	post_scheduler(shader &sh) : pass(sh),
271 		ready(), ready_copies(), pending(), cur_bb(),
272 		live(), ucm(), alu(sh),	regmap(), cleared_interf() {}
273 
274 	virtual int run();
275 	bool run_on(container_node *n);
276 	bool schedule_bb(bb_node *bb);
277 
278 	void load_index_register(value *v, unsigned idx);
279 	void process_fetch(container_node *c);
280 
281 	bool process_alu(container_node *c);
282 	bool schedule_alu(container_node *c);
283 	bool prepare_alu_group();
284 
285 	void release_op(node *n);
286 
287 	void release_src_values(node *n);
288 	void release_src_vec(vvec &vv, bool src);
289 	void release_src_val(value *v);
290 
291 	void init_uc_val(container_node *c, value *v);
292 	void init_uc_vec(container_node *c, vvec &vv, bool src);
293 	unsigned init_ucm(container_node *c, node *n);
294 
295 	void init_regmap();
296 
297 	bool check_interferences();
298 
299 	unsigned try_add_instruction(node *n);
300 
301 	bool check_copy(node *n);
302 	void dump_group(alu_group_tracker &rt);
303 
304 	bool unmap_dst(alu_node *n);
305 	bool unmap_dst_val(value *d);
306 
307 	bool map_src(alu_node *n);
308 	bool map_src_vec(vvec &vv, bool src);
309 	bool map_src_val(value *v);
310 
311 	bool recolor_local(value *v);
312 
313 	void update_local_interferences();
314 	void update_live_src_vec(vvec &vv, val_set *born, bool src);
315 	void update_live_dst_vec(vvec &vv);
316 	void update_live(node *n, val_set *born);
317 	void process_group();
318 
319 	void set_color_local_val(value *v, sel_chan color);
320 	void set_color_local(value *v, sel_chan color);
321 
322 	void add_interferences(value *v, sb_bitset &rb, val_set &vs);
323 
324 	void init_globals(val_set &s, bool prealloc);
325 
326 	void recolor_locals();
327 
328 	void dump_regmap();
329 
330 	void emit_load_ar();
331 	void emit_clause();
332 
333 	void process_ready_copies();
334 };
335 
336 } // namespace r600_sb
337 
338 #endif /* SB_SCHED_H_ */
339