1 /*
2  * Copyright (C) 2020 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include "compiler.h"
28 
29 /* Bifrost requires special functions to be lowered in various machine specific
30  * ways. The routines in this file are used in codegen for this. */
31 
32 /* New Bifrost has a FEXP2_FAST instruction but requires an auxiliary
33  * parameter. */
34 
35 static void
bi_emit_fexp2_new(bi_context * ctx,nir_alu_instr * instr)36 bi_emit_fexp2_new(bi_context *ctx, nir_alu_instr *instr)
37 {
38         /* FMA_MSCALE T, X, 1.0, 0, 0x18 */
39 
40         bi_instruction mscale = {
41                 .type = BI_FMA,
42                 .op = { .mscale = true },
43                 .dest = bi_make_temp(ctx),
44                 .dest_type = nir_type_float32,
45                 .src = {
46                         pan_src_index(&instr->src[0].src),
47                         BIR_INDEX_CONSTANT | 0,
48                         BIR_INDEX_ZERO,
49                         BIR_INDEX_CONSTANT | 32,
50                 },
51                 .src_types = {
52                         nir_type_float32,
53                         nir_type_float32,
54                         nir_type_float32,
55                         nir_type_int32,
56                 },
57                 .constant = {
58                         /* 0x3f80000000 = 1.0f as fp32
59                          * 24 = shift to multiply by 2^24 */
60                         .u64 = (0x3f800000) | (24ull << 32)
61                 },
62                 .swizzle = { { instr->src[0].swizzle[0] } }
63         };
64 
65         /* F2I_RTE T, T */
66 
67         bi_instruction f2i = {
68                 .type = BI_CONVERT,
69                 .dest = bi_make_temp(ctx),
70                 .dest_type = nir_type_int32,
71                 .src = { mscale.dest },
72                 .src_types = { nir_type_float32 },
73                 .roundmode = BIFROST_RTE
74         };
75 
76         /* FEXP2_FAST T, T, X */
77 
78         bi_instruction fexp = {
79                 .type = BI_SPECIAL_ADD,
80                 .op = { .special = BI_SPECIAL_EXP2_LOW },
81                 .dest = pan_dest_index(&instr->dest.dest),
82                 .dest_type = nir_type_float32,
83                 .src = { f2i.dest, mscale.src[0] },
84                 .src_types = { nir_type_int32, nir_type_float32 },
85                 .swizzle = { {}, { instr->src[0].swizzle[0] } }
86         };
87 
88         bi_emit(ctx, mscale);
89         bi_emit(ctx, f2i);
90         bi_emit(ctx, fexp);
91 }
92 
93 /* Even on new Bifrost, there are a bunch of reductions to do */
94 
95 static void
bi_emit_flog2_new(bi_context * ctx,nir_alu_instr * instr)96 bi_emit_flog2_new(bi_context *ctx, nir_alu_instr *instr)
97 {
98         /* LOG_FREXPE X */
99         bi_instruction frexpe = {
100                 .type = BI_FREXP,
101                 .op = { .frexp = BI_FREXPE_LOG },
102                 .dest = bi_make_temp(ctx),
103                 .dest_type = nir_type_int32,
104                 .src = { pan_src_index(&instr->src[0].src) },
105                 .src_types = { nir_type_float32 },
106                 .swizzle = { { instr->src[0].swizzle[0] } }
107         };
108 
109         /* I32_TO_F32 m */
110         bi_instruction i2f = {
111                 .type = BI_CONVERT,
112                 .dest = bi_make_temp(ctx),
113                 .dest_type = nir_type_float32,
114                 .src = { frexpe.dest },
115                 .src_types = { nir_type_int32 },
116                 .roundmode = BIFROST_RTZ
117         };
118 
119         /* ADD_FREXPM (x-1), -1.0, X */
120         bi_instruction x_minus_1 = {
121                 .type = BI_REDUCE_FMA,
122                 .op = { .reduce = BI_REDUCE_ADD_FREXPM },
123                 .dest = bi_make_temp(ctx),
124                 .dest_type = nir_type_float32,
125                 .src = {
126                         BIR_INDEX_CONSTANT,
127                         pan_src_index(&instr->src[0].src),
128                 },
129                 .src_types = { nir_type_float32, nir_type_float32 },
130                 .constant = {
131                         .u64 = 0xBF800000 /* -1.0 */
132                 },
133                 .swizzle = { {}, { instr->src[0].swizzle[0] } }
134         };
135 
136         /* FLOG2_HELP log2(x)/(x-1), x */
137         bi_instruction help = {
138                 .type = BI_TABLE,
139                 .op = { .table = BI_TABLE_LOG2_U_OVER_U_1_LOW },
140                 .dest = bi_make_temp(ctx),
141                 .dest_type = nir_type_float32,
142                 .src = { pan_src_index(&instr->src[0].src) },
143                 .src_types = { nir_type_float32 },
144                 .swizzle = { { instr->src[0].swizzle[0] } }
145         };
146 
147         /* FMA log2(x)/(x - 1), (x - 1), M */
148         bi_instruction fma = {
149                 .type = BI_FMA,
150                 .dest = pan_dest_index(&instr->dest.dest),
151                 .dest_type = nir_type_float32,
152                 .src = {
153                         help.dest,
154                         x_minus_1.dest,
155                         i2f.dest
156                 },
157                 .src_types = {
158                         nir_type_float32,
159                         nir_type_float32,
160                         nir_type_float32
161                 }
162         };
163 
164         bi_emit(ctx, frexpe);
165         bi_emit(ctx, i2f);
166         bi_emit(ctx, x_minus_1);
167         bi_emit(ctx, help);
168         bi_emit(ctx, fma);
169 }
170 
171 void
bi_emit_fexp2(bi_context * ctx,nir_alu_instr * instr)172 bi_emit_fexp2(bi_context *ctx, nir_alu_instr *instr)
173 {
174         /* TODO: G71 */
175         bi_emit_fexp2_new(ctx, instr);
176 }
177 
178 void
bi_emit_flog2(bi_context * ctx,nir_alu_instr * instr)179 bi_emit_flog2(bi_context *ctx, nir_alu_instr *instr)
180 {
181         /* TODO: G71 */
182         bi_emit_flog2_new(ctx, instr);
183 }
184