1 /*
2  * Copyright © 2016 Red Hat
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <stdbool.h>
25 
26 #include "st_tgsi_lower_yuv.h"
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
30 #include "util/u_debug.h"
31 
32 #include "util/bitscan.h"
33 
34 struct tgsi_yuv_transform {
35    struct tgsi_transform_context base;
36    struct tgsi_shader_info info;
37    struct tgsi_full_src_register imm[4];
38    struct {
39       struct tgsi_full_src_register src;
40       struct tgsi_full_dst_register dst;
41    } tmp[2];
42 #define A 0
43 #define B 1
44 
45    /* Maps a primary sampler (used for Y) to the U or UV sampler.  In
46     * case of 3-plane YUV format, the V plane is next sampler after U.
47     */
48    unsigned char sampler_map[PIPE_MAX_SAMPLERS][2];
49 
50    bool first_instruction_emitted;
51    unsigned free_slots;
52    unsigned lower_nv12;
53    unsigned lower_iyuv;
54 };
55 
56 static inline struct tgsi_yuv_transform *
tgsi_yuv_transform(struct tgsi_transform_context * tctx)57 tgsi_yuv_transform(struct tgsi_transform_context *tctx)
58 {
59    return (struct tgsi_yuv_transform *)tctx;
60 }
61 
62 static void
reg_dst(struct tgsi_full_dst_register * dst,const struct tgsi_full_dst_register * orig_dst,unsigned wrmask)63 reg_dst(struct tgsi_full_dst_register *dst,
64         const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
65 {
66    *dst = *orig_dst;
67    dst->Register.WriteMask &= wrmask;
68    assert(dst->Register.WriteMask);
69 }
70 
71 static inline void
get_swiz(unsigned * swiz,const struct tgsi_src_register * src)72 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
73 {
74    swiz[0] = src->SwizzleX;
75    swiz[1] = src->SwizzleY;
76    swiz[2] = src->SwizzleZ;
77    swiz[3] = src->SwizzleW;
78 }
79 
80 static void
reg_src(struct tgsi_full_src_register * src,const struct tgsi_full_src_register * orig_src,unsigned sx,unsigned sy,unsigned sz,unsigned sw)81 reg_src(struct tgsi_full_src_register *src,
82         const struct tgsi_full_src_register *orig_src,
83         unsigned sx, unsigned sy, unsigned sz, unsigned sw)
84 {
85    unsigned swiz[4];
86    get_swiz(swiz, &orig_src->Register);
87    *src = *orig_src;
88    src->Register.SwizzleX = swiz[sx];
89    src->Register.SwizzleY = swiz[sy];
90    src->Register.SwizzleZ = swiz[sz];
91    src->Register.SwizzleW = swiz[sw];
92 }
93 
94 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
95 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
96       TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
97 
98 static inline struct tgsi_full_instruction
tex_instruction(unsigned samp)99 tex_instruction(unsigned samp)
100 {
101    struct tgsi_full_instruction inst;
102 
103    inst = tgsi_default_full_instruction();
104    inst.Instruction.Opcode = TGSI_OPCODE_TEX;
105    inst.Instruction.Texture = 1;
106    inst.Texture.Texture = TGSI_TEXTURE_2D;
107    inst.Instruction.NumDstRegs = 1;
108    inst.Instruction.NumSrcRegs = 2;
109    inst.Src[1].Register.File  = TGSI_FILE_SAMPLER;
110    inst.Src[1].Register.Index = samp;
111 
112    return inst;
113 }
114 
115 static inline struct tgsi_full_instruction
mov_instruction(void)116 mov_instruction(void)
117 {
118    struct tgsi_full_instruction inst;
119 
120    inst = tgsi_default_full_instruction();
121    inst.Instruction.Opcode = TGSI_OPCODE_MOV;
122    inst.Instruction.Saturate = 0;
123    inst.Instruction.NumDstRegs = 1;
124    inst.Instruction.NumSrcRegs = 1;
125 
126    return inst;
127 }
128 
129 static inline struct tgsi_full_instruction
dp3_instruction(void)130 dp3_instruction(void)
131 {
132    struct tgsi_full_instruction inst;
133 
134    inst = tgsi_default_full_instruction();
135    inst.Instruction.Opcode = TGSI_OPCODE_DP3;
136    inst.Instruction.NumDstRegs = 1;
137    inst.Instruction.NumSrcRegs = 2;
138 
139    return inst;
140 }
141 
142 
143 
144 static void
emit_immed(struct tgsi_transform_context * tctx,int idx,float x,float y,float z,float w)145 emit_immed(struct tgsi_transform_context *tctx, int idx,
146            float x, float y, float z, float w)
147 {
148    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
149    struct tgsi_shader_info *info = &ctx->info;
150    struct tgsi_full_immediate immed;
151 
152    immed = tgsi_default_full_immediate();
153    immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
154    immed.u[0].Float = x;
155    immed.u[1].Float = y;
156    immed.u[2].Float = z;
157    immed.u[3].Float = w;
158    tctx->emit_immediate(tctx, &immed);
159 
160    ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE;
161    ctx->imm[idx].Register.Index = info->immediate_count + idx;
162    ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X;
163    ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y;
164    ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z;
165    ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W;
166 }
167 
168 static void
emit_samp(struct tgsi_transform_context * tctx,unsigned samp)169 emit_samp(struct tgsi_transform_context *tctx, unsigned samp)
170 {
171    tgsi_transform_sampler_decl(tctx, samp);
172    tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D,
173                                     TGSI_RETURN_TYPE_FLOAT);
174 }
175 
176 /* Emit extra declarations we need:
177  *  + 2 TEMP to hold intermediate results
178  *  + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per
179  *    lowered YUV sampler
180  *  + extra immediates for doing CSC
181  */
182 static void
emit_decls(struct tgsi_transform_context * tctx)183 emit_decls(struct tgsi_transform_context *tctx)
184 {
185    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
186    struct tgsi_shader_info *info = &ctx->info;
187    unsigned mask, tempbase, i;
188    struct tgsi_full_declaration decl;
189 
190    /*
191     * Declare immediates for CSC conversion:
192     */
193 
194    /* ITU-R BT.601 conversion */
195    emit_immed(tctx, 0, 1.164,  0.000,  1.596,  0.0);
196    emit_immed(tctx, 1, 1.164, -0.392, -0.813,  0.0);
197    emit_immed(tctx, 2, 1.164,  2.017,  0.000,  0.0);
198    emit_immed(tctx, 3, 0.0625, 0.500,  0.500,  1.0);
199 
200    /*
201     * Declare extra samplers / sampler-views:
202     */
203 
204    mask = ctx->lower_nv12 | ctx->lower_iyuv;
205    while (mask) {
206       unsigned extra, y_samp = u_bit_scan(&mask);
207 
208       extra = u_bit_scan(&ctx->free_slots);
209       ctx->sampler_map[y_samp][0] = extra;
210       emit_samp(tctx, extra);
211 
212       if (ctx->lower_iyuv & (1 << y_samp)) {
213          extra = u_bit_scan(&ctx->free_slots);
214          ctx->sampler_map[y_samp][1] = extra;
215          emit_samp(tctx, extra);
216       }
217    }
218 
219    /*
220     * Declare extra temp:
221     */
222 
223    tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
224 
225    for (i = 0; i < 2; i++) {
226       decl = tgsi_default_full_declaration();
227       decl.Declaration.File = TGSI_FILE_TEMPORARY;
228       decl.Range.First = decl.Range.Last = tempbase + i;
229       tctx->emit_declaration(tctx, &decl);
230 
231       ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
232       ctx->tmp[i].src.Register.Index = tempbase + i;
233       ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
234       ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
235       ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
236       ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
237 
238       ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
239       ctx->tmp[i].dst.Register.Index = tempbase + i;
240       ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
241    }
242 }
243 
244 /* call with YUV in tmpA.xyz */
245 static void
yuv_to_rgb(struct tgsi_transform_context * tctx,struct tgsi_full_dst_register * dst)246 yuv_to_rgb(struct tgsi_transform_context *tctx,
247            struct tgsi_full_dst_register *dst)
248 {
249    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
250    struct tgsi_full_instruction inst;
251 
252    /*
253     * IMM[0] FLT32 { 1.164,  0.000,  1.596,  0.0 }
254     * IMM[1] FLT32 { 1.164, -0.392, -0.813,  0.0 }
255     * IMM[2] FLT32 { 1.164,  2.017,  0.000,  0.0 }
256     * IMM[3] FLT32 { 0.0625, 0.500,  0.500,  1.0 }
257     */
258 
259    /* SUB tmpA.xyz, tmpA, imm[3] */
260    inst = tgsi_default_full_instruction();
261    inst.Instruction.Opcode = TGSI_OPCODE_ADD;
262    inst.Instruction.Saturate = 0;
263    inst.Instruction.NumDstRegs = 1;
264    inst.Instruction.NumSrcRegs = 2;
265    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
266    reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
267    reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _));
268    inst.Src[1].Register.Negate = 1;
269    tctx->emit_instruction(tctx, &inst);
270 
271    /* DP3 dst.x, tmpA, imm[0] */
272    inst = dp3_instruction();
273    reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
274    reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
275    reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
276    tctx->emit_instruction(tctx, &inst);
277 
278    /* DP3 dst.y, tmpA, imm[1] */
279    inst = dp3_instruction();
280    reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
281    reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
282    reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
283    tctx->emit_instruction(tctx, &inst);
284 
285    /* DP3 dst.z, tmpA, imm[2] */
286    inst = dp3_instruction();
287    reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
288    reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
289    reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
290    tctx->emit_instruction(tctx, &inst);
291 
292    /* MOV dst.w, imm[0].x */
293    inst = mov_instruction();
294    reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
295    reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
296    tctx->emit_instruction(tctx, &inst);
297 }
298 
299 static void
lower_nv12(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * originst)300 lower_nv12(struct tgsi_transform_context *tctx,
301            struct tgsi_full_instruction *originst)
302 {
303    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
304    struct tgsi_full_instruction inst;
305    struct tgsi_full_src_register *coord = &originst->Src[0];
306    unsigned samp = originst->Src[1].Register.Index;
307 
308    /* sample Y:
309     *    TEX tempA.x, coord, texture[samp], 2D;
310     */
311    inst = tex_instruction(samp);
312    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
313    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
314    tctx->emit_instruction(tctx, &inst);
315 
316    /* sample UV:
317     *    TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D;
318     *    MOV tempA.yz, tempB._xy_
319     */
320    inst = tex_instruction(ctx->sampler_map[samp][0]);
321    reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY);
322    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
323    tctx->emit_instruction(tctx, &inst);
324 
325    inst = mov_instruction();
326    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ);
327    reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _));
328    tctx->emit_instruction(tctx, &inst);
329 
330    /* At this point, we have YUV in tempA.xyz, rest is common: */
331    yuv_to_rgb(tctx, &originst->Dst[0]);
332 }
333 
334 static void
lower_iyuv(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * originst)335 lower_iyuv(struct tgsi_transform_context *tctx,
336            struct tgsi_full_instruction *originst)
337 {
338    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
339    struct tgsi_full_instruction inst;
340    struct tgsi_full_src_register *coord = &originst->Src[0];
341    unsigned samp = originst->Src[1].Register.Index;
342 
343    /* sample Y:
344     *    TEX tempA.x, coord, texture[samp], 2D;
345     */
346    inst = tex_instruction(samp);
347    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
348    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
349    tctx->emit_instruction(tctx, &inst);
350 
351    /* sample U:
352     *    TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D;
353     *    MOV tempA.y, tempB._x__
354     */
355    inst = tex_instruction(ctx->sampler_map[samp][0]);
356    reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
357    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
358    tctx->emit_instruction(tctx, &inst);
359 
360    inst = mov_instruction();
361    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
362    reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _));
363    tctx->emit_instruction(tctx, &inst);
364 
365    /* sample V:
366     *    TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D;
367     *    MOV tempA.z, tempB.__x_
368     */
369    inst = tex_instruction(ctx->sampler_map[samp][1]);
370    reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
371    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
372    tctx->emit_instruction(tctx, &inst);
373 
374    inst = mov_instruction();
375    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
376    reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _));
377    tctx->emit_instruction(tctx, &inst);
378 
379    /* At this point, we have YUV in tempA.xyz, rest is common: */
380    yuv_to_rgb(tctx, &originst->Dst[0]);
381 }
382 
383 static void
transform_instr(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)384 transform_instr(struct tgsi_transform_context *tctx,
385                 struct tgsi_full_instruction *inst)
386 {
387    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
388 
389    if (!ctx->first_instruction_emitted) {
390       emit_decls(tctx);
391       ctx->first_instruction_emitted = true;
392    }
393 
394    switch (inst->Instruction.Opcode) {
395    /* TODO what other tex opcode's can be used w/ external eglimgs? */
396    case TGSI_OPCODE_TEX: {
397       unsigned samp = inst->Src[1].Register.Index;
398       if (ctx->lower_nv12 & (1 << samp)) {
399          lower_nv12(tctx, inst);
400       } else if (ctx->lower_iyuv & (1 << samp)) {
401          lower_iyuv(tctx, inst);
402       } else {
403          goto skip;
404       }
405       break;
406    }
407    default:
408    skip:
409       tctx->emit_instruction(tctx, inst);
410       return;
411    }
412 }
413 
414 extern const struct tgsi_token *
st_tgsi_lower_yuv(const struct tgsi_token * tokens,unsigned free_slots,unsigned lower_nv12,unsigned lower_iyuv)415 st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots,
416                   unsigned lower_nv12, unsigned lower_iyuv)
417 {
418    struct tgsi_yuv_transform ctx;
419    struct tgsi_token *newtoks;
420    int newlen;
421 
422    assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */
423 
424 //   tgsi_dump(tokens, 0);
425 //   debug_printf("\n");
426 
427    memset(&ctx, 0, sizeof(ctx));
428    ctx.base.transform_instruction = transform_instr;
429    ctx.free_slots = free_slots;
430    ctx.lower_nv12 = lower_nv12;
431    ctx.lower_iyuv = lower_iyuv;
432    tgsi_scan_shader(tokens, &ctx.info);
433 
434    /* TODO better job of figuring out how many extra tokens we need..
435     * this is a pain about tgsi_transform :-/
436     */
437    newlen = tgsi_num_tokens(tokens) + 120;
438    newtoks = tgsi_alloc_tokens(newlen);
439    if (!newtoks)
440       return NULL;
441 
442    tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
443 
444 //   tgsi_dump(newtoks, 0);
445 //   debug_printf("\n");
446 
447    return newtoks;
448 }
449