1 /*
2  * Copyright (C) 2020 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include "compiler.h"
28 
29 /* Does an instruction respect outmods and source mods? Depend
30  * on the types involved */
31 
32 bool
bi_has_outmod(bi_instruction * ins)33 bi_has_outmod(bi_instruction *ins)
34 {
35         bool classy = bi_class_props[ins->type] & BI_MODS;
36         bool floaty = nir_alu_type_get_base_type(ins->dest_type) == nir_type_float;
37 
38         return classy && floaty;
39 }
40 
41 /* Have to check source for e.g. compares */
42 
43 bool
bi_has_source_mods(bi_instruction * ins)44 bi_has_source_mods(bi_instruction *ins)
45 {
46         bool classy = bi_class_props[ins->type] & BI_MODS;
47         bool floaty = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_float;
48 
49         return classy && floaty;
50 }
51 
52 /* A source is swizzled if the op is swizzlable, in 8-bit or
53  * 16-bit mode, and the swizzled op. TODO: multi args */
54 
55 bool
bi_is_src_swizzled(bi_instruction * ins,unsigned s)56 bi_is_src_swizzled(bi_instruction *ins, unsigned s)
57 {
58         bool classy = bi_class_props[ins->type] & BI_SWIZZLABLE;
59         bool small = nir_alu_type_get_type_size(ins->dest_type) < 32;
60         bool first = (s == 0); /* TODO: prop? */
61 
62         return classy && small && first;
63 }
64 
65 bool
bi_has_arg(bi_instruction * ins,unsigned arg)66 bi_has_arg(bi_instruction *ins, unsigned arg)
67 {
68         if (!ins)
69                 return false;
70 
71         bi_foreach_src(ins, s) {
72                 if (ins->src[s] == arg)
73                         return true;
74         }
75 
76         return false;
77 }
78 
79 uint16_t
bi_from_bytemask(uint16_t bytemask,unsigned bytes)80 bi_from_bytemask(uint16_t bytemask, unsigned bytes)
81 {
82         unsigned value = 0;
83 
84         for (unsigned c = 0, d = 0; c < 16; c += bytes, ++d) {
85                 bool a = (bytemask & (1 << c)) != 0;
86 
87                 for (unsigned q = c; q < bytes; ++q)
88                         assert(((bytemask & (1 << q)) != 0) == a);
89 
90                 value |= (a << d);
91         }
92 
93         return value;
94 }
95 
96 unsigned
bi_get_component_count(bi_instruction * ins,signed src)97 bi_get_component_count(bi_instruction *ins, signed src)
98 {
99         /* Discards and branches are oddball since they're not BI_VECTOR but no
100          * destination. So special case.. */
101         if (ins->type == BI_DISCARD || ins->type == BI_BRANCH)
102                 return 1;
103 
104         if (bi_class_props[ins->type] & BI_VECTOR) {
105                 assert(ins->vector_channels);
106                 return (src <= 0) ? ins->vector_channels : 1;
107         } else {
108                 unsigned dest_bytes = nir_alu_type_get_type_size(ins->dest_type);
109                 unsigned src_bytes = nir_alu_type_get_type_size(ins->src_types[MAX2(src, 0)]);
110 
111                 /* If there's either f32 on either end, it's only a single
112                  * component, etc. */
113 
114                 unsigned bytes = src < 0 ? dest_bytes : src_bytes;
115 
116                 if (ins->type == BI_CONVERT)
117                         bytes = MAX2(dest_bytes, src_bytes);
118 
119                 if (ins->type == BI_ATEST || ins->type == BI_SELECT)
120                         return 1;
121 
122                 return MAX2(32 / bytes, 1);
123         }
124 }
125 
126 uint16_t
bi_bytemask_of_read_components(bi_instruction * ins,unsigned node)127 bi_bytemask_of_read_components(bi_instruction *ins, unsigned node)
128 {
129         uint16_t mask = 0x0;
130 
131         bi_foreach_src(ins, s) {
132                 if (ins->src[s] != node) continue;
133                 unsigned component_count = bi_get_component_count(ins, s);
134                 nir_alu_type T = ins->src_types[s];
135                 unsigned size = nir_alu_type_get_type_size(T);
136                 unsigned bytes = size / 8;
137                 unsigned cmask = (1 << bytes) - 1;
138 
139                 for (unsigned i = 0; i < component_count; ++i) {
140                         unsigned c = ins->swizzle[s][i];
141                         mask |= (cmask << (c * bytes));
142                 }
143         }
144 
145         return mask;
146 }
147 
148 uint64_t
bi_get_immediate(bi_instruction * ins,unsigned index)149 bi_get_immediate(bi_instruction *ins, unsigned index)
150 {
151         unsigned v = ins->src[index];
152         assert(v & BIR_INDEX_CONSTANT);
153         unsigned shift = v & ~BIR_INDEX_CONSTANT;
154         uint64_t shifted = ins->constant.u64 >> shift;
155 
156         /* Mask off the accessed part */
157         unsigned sz = nir_alu_type_get_type_size(ins->src_types[index]);
158 
159         if (sz == 64)
160                 return shifted;
161         else
162                 return shifted & ((1ull << sz) - 1);
163 }
164 
165 bool
bi_writes_component(bi_instruction * ins,unsigned comp)166 bi_writes_component(bi_instruction *ins, unsigned comp)
167 {
168         return comp < bi_get_component_count(ins, -1);
169 }
170 
171 /* Determine effective writemask for RA/DCE, noting that we currently act
172  * per-register hence aligning. TODO: when real write masks are handled in
173  * packing (not for a while), update this routine, removing the align */
174 
175 unsigned
bi_writemask(bi_instruction * ins)176 bi_writemask(bi_instruction *ins)
177 {
178         nir_alu_type T = ins->dest_type;
179         unsigned size = nir_alu_type_get_type_size(T);
180         unsigned bytes_per_comp = size / 8;
181         unsigned components = bi_get_component_count(ins, -1);
182         unsigned bytes = ALIGN_POT(bytes_per_comp * components, 4);
183         unsigned mask = (1 << bytes) - 1;
184         unsigned shift = ins->dest_offset * 4; /* 32-bit words */
185         return (mask << shift);
186 }
187 
188 /* Rewrites uses of an index. This is O(nc) to the program and number of
189  * uses, so combine lowering is effectively O(n^2).  Better bookkeeping
190  * would bring down to linear if that's an issue. */
191 
192 void
bi_rewrite_uses(bi_context * ctx,unsigned old,unsigned oldc,unsigned new,unsigned newc)193 bi_rewrite_uses(bi_context *ctx,
194                 unsigned old, unsigned oldc,
195                 unsigned new, unsigned newc)
196 {
197         assert(newc >= oldc);
198 
199         bi_foreach_instr_global(ctx, ins) {
200                 bi_foreach_src(ins, s) {
201                         if (ins->src[s] != old) continue;
202 
203                         for (unsigned i = 0; i < 16; ++i)
204                                 ins->swizzle[s][i] += (newc - oldc);
205 
206                         ins->src[s] = new;
207                 }
208         }
209 }
210 
211 
212