1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "vk_nir_convert_ycbcr.h"
25 
26 #include "vk_format.h"
27 #include "vk_ycbcr_conversion.h"
28 
29 #include <math.h>
30 
31 static nir_def *
y_range(nir_builder * b,nir_def * y_channel,int bpc,VkSamplerYcbcrRange range)32 y_range(nir_builder *b,
33         nir_def *y_channel,
34         int bpc,
35         VkSamplerYcbcrRange range)
36 {
37    switch (range) {
38    case VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
39       return y_channel;
40    case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
41       return nir_fmul_imm(b,
42                           nir_fadd_imm(b,
43                                        nir_fmul_imm(b, y_channel,
44                                                     pow(2, bpc) - 1),
45                                        -16.0f * pow(2, bpc - 8)),
46                           1.0f / (219.0f * pow(2, bpc - 8)));
47 
48    default:
49       unreachable("missing Ycbcr range");
50       return NULL;
51    }
52 }
53 
54 static nir_def *
chroma_range(nir_builder * b,nir_def * chroma_channel,int bpc,VkSamplerYcbcrRange range)55 chroma_range(nir_builder *b,
56              nir_def *chroma_channel,
57              int bpc,
58              VkSamplerYcbcrRange range)
59 {
60    switch (range) {
61    case VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
62       return nir_fadd(b, chroma_channel,
63                       nir_imm_float(b, -pow(2, bpc - 1) / (pow(2, bpc) - 1.0f)));
64    case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
65       return nir_fmul_imm(b,
66                           nir_fadd_imm(b,
67                                        nir_fmul_imm(b, chroma_channel,
68                                                     pow(2, bpc) - 1),
69                                        -128.0f * pow(2, bpc - 8)),
70                           1.0f / (224.0f * pow(2, bpc - 8)));
71    default:
72       unreachable("missing Ycbcr range");
73       return NULL;
74    }
75 }
76 
77 typedef struct nir_const_value_3_4 {
78    nir_const_value v[3][4];
79 } nir_const_value_3_4;
80 
81 static const nir_const_value_3_4 *
ycbcr_model_to_rgb_matrix(VkSamplerYcbcrModelConversion model)82 ycbcr_model_to_rgb_matrix(VkSamplerYcbcrModelConversion model)
83 {
84    switch (model) {
85    case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601: {
86       static const nir_const_value_3_4 bt601 = { {
87          { { .f32 =  1.402f             }, { .f32 = 1.0f }, { .f32 =  0.0f               }, { .f32 = 0.0f } },
88          { { .f32 = -0.714136286201022f }, { .f32 = 1.0f }, { .f32 = -0.344136286201022f }, { .f32 = 0.0f } },
89          { { .f32 =  0.0f               }, { .f32 = 1.0f }, { .f32 =  1.772f             }, { .f32 = 0.0f } },
90       } };
91 
92       return &bt601;
93    }
94    case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709: {
95       static const nir_const_value_3_4 bt709 = { {
96          { { .f32 =  1.5748031496063f   }, { .f32 = 1.0f }, { .f32 =  0.0f               }, { .f32 = 0.0f } },
97          { { .f32 = -0.468125209181067f }, { .f32 = 1.0f }, { .f32 = -0.187327487470334f }, { .f32 = 0.0f } },
98          { { .f32 =  0.0f               }, { .f32 = 1.0f }, { .f32 =  1.85563184264242f  }, { .f32 = 0.0f } },
99       } };
100 
101       return &bt709;
102    }
103    case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020: {
104       static const nir_const_value_3_4 bt2020 = { {
105          { { .f32 =  1.4746f            }, { .f32 = 1.0f }, { .f32 =  0.0f               }, { .f32 = 0.0f } },
106          { { .f32 = -0.571353126843658f }, { .f32 = 1.0f }, { .f32 = -0.164553126843658f }, { .f32 = 0.0f } },
107          { { .f32 =  0.0f               }, { .f32 = 1.0f }, { .f32 =  1.8814f            }, { .f32 = 0.0f } },
108       } };
109 
110       return &bt2020;
111    }
112    default:
113       unreachable("missing Ycbcr model");
114       return NULL;
115    }
116 }
117 
118 nir_def *
nir_convert_ycbcr_to_rgb(nir_builder * b,VkSamplerYcbcrModelConversion model,VkSamplerYcbcrRange range,nir_def * raw_channels,uint32_t * bpcs)119 nir_convert_ycbcr_to_rgb(nir_builder *b,
120                          VkSamplerYcbcrModelConversion model,
121                          VkSamplerYcbcrRange range,
122                          nir_def *raw_channels,
123                          uint32_t *bpcs)
124 {
125    nir_def *expanded_channels =
126       nir_vec4(b,
127                chroma_range(b, nir_channel(b, raw_channels, 0), bpcs[0], range),
128                y_range(b, nir_channel(b, raw_channels, 1), bpcs[1], range),
129                chroma_range(b, nir_channel(b, raw_channels, 2), bpcs[2], range),
130                nir_channel(b, raw_channels, 3));
131 
132    if (model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
133       return expanded_channels;
134 
135    const nir_const_value_3_4 *conversion_matrix =
136       ycbcr_model_to_rgb_matrix(model);
137 
138    nir_def *converted_channels[] = {
139       nir_fdot(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix->v[0])),
140       nir_fdot(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix->v[1])),
141       nir_fdot(b, expanded_channels, nir_build_imm(b, 4, 32, conversion_matrix->v[2]))
142    };
143 
144    return nir_vec4(b,
145                    converted_channels[0], converted_channels[1],
146                    converted_channels[2], nir_channel(b, raw_channels, 3));
147 }
148 
149 struct ycbcr_state {
150    nir_builder *builder;
151    nir_def *image_size;
152    nir_tex_instr *origin_tex;
153    nir_deref_instr *tex_deref;
154    const struct vk_ycbcr_conversion_state *conversion;
155    const struct vk_format_ycbcr_info *format_ycbcr_info;
156 };
157 
158 /* TODO: we should probably replace this with a push constant/uniform. */
159 static nir_def *
get_texture_size(struct ycbcr_state * state,nir_deref_instr * texture)160 get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture)
161 {
162    if (state->image_size)
163       return state->image_size;
164 
165    nir_builder *b = state->builder;
166    const struct glsl_type *type = texture->type;
167    nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
168 
169    tex->op = nir_texop_txs;
170    tex->sampler_dim = glsl_get_sampler_dim(type);
171    tex->is_array = glsl_sampler_type_is_array(type);
172    tex->is_shadow = glsl_sampler_type_is_shadow(type);
173    tex->dest_type = nir_type_int32;
174 
175    tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
176                                      &texture->def);
177 
178    nir_def_init(&tex->instr, &tex->def, nir_tex_instr_dest_size(tex), 32);
179    nir_builder_instr_insert(b, &tex->instr);
180 
181    state->image_size = nir_i2f32(b, &tex->def);
182 
183    return state->image_size;
184 }
185 
186 static nir_def *
implicit_downsampled_coord(nir_builder * b,nir_def * value,nir_def * max_value,int div_scale)187 implicit_downsampled_coord(nir_builder *b,
188                            nir_def *value,
189                            nir_def *max_value,
190                            int div_scale)
191 {
192    return nir_fadd(b,
193                    value,
194                    nir_frcp(b,
195                             nir_fmul(b,
196                                      nir_imm_float(b, div_scale),
197                                      max_value)));
198 }
199 
200 static nir_def *
implicit_downsampled_coords(struct ycbcr_state * state,nir_def * old_coords,const struct vk_format_ycbcr_plane * format_plane)201 implicit_downsampled_coords(struct ycbcr_state *state,
202                             nir_def *old_coords,
203                             const struct vk_format_ycbcr_plane *format_plane)
204 {
205    nir_builder *b = state->builder;
206    const struct vk_ycbcr_conversion_state *conversion = state->conversion;
207    nir_def *image_size = get_texture_size(state, state->tex_deref);
208    nir_def *comp[4] = { NULL, };
209    int c;
210 
211    for (c = 0; c < ARRAY_SIZE(conversion->chroma_offsets); c++) {
212       if (format_plane->denominator_scales[c] > 1 &&
213           conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) {
214          comp[c] = implicit_downsampled_coord(b,
215                                               nir_channel(b, old_coords, c),
216                                               nir_channel(b, image_size, c),
217                                               format_plane->denominator_scales[c]);
218       } else {
219          comp[c] = nir_channel(b, old_coords, c);
220       }
221    }
222 
223    /* Leave other coordinates untouched */
224    for (; c < old_coords->num_components; c++)
225       comp[c] = nir_channel(b, old_coords, c);
226 
227    return nir_vec(b, comp, old_coords->num_components);
228 }
229 
230 static nir_def *
create_plane_tex_instr_implicit(struct ycbcr_state * state,uint32_t plane)231 create_plane_tex_instr_implicit(struct ycbcr_state *state,
232                                 uint32_t plane)
233 {
234    nir_builder *b = state->builder;
235    const struct vk_ycbcr_conversion_state *conversion = state->conversion;
236    const struct vk_format_ycbcr_plane *format_plane =
237       &state->format_ycbcr_info->planes[plane];
238    nir_tex_instr *old_tex = state->origin_tex;
239    nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs + 1);
240 
241    for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
242       tex->src[i].src_type = old_tex->src[i].src_type;
243 
244       switch (old_tex->src[i].src_type) {
245       case nir_tex_src_coord:
246          if (format_plane->has_chroma && conversion->chroma_reconstruction) {
247             tex->src[i].src =
248                nir_src_for_ssa(implicit_downsampled_coords(state,
249                                                            old_tex->src[i].src.ssa,
250                                                            format_plane));
251             break;
252          }
253          FALLTHROUGH;
254       default:
255          tex->src[i].src = nir_src_for_ssa(old_tex->src[i].src.ssa);
256          break;
257       }
258    }
259    tex->src[tex->num_srcs - 1] = nir_tex_src_for_ssa(nir_tex_src_plane,
260                                                      nir_imm_int(b, plane));
261    tex->sampler_dim = old_tex->sampler_dim;
262    tex->dest_type = old_tex->dest_type;
263 
264    tex->op = old_tex->op;
265    tex->coord_components = old_tex->coord_components;
266    tex->is_new_style_shadow = old_tex->is_new_style_shadow;
267    tex->component = old_tex->component;
268 
269    tex->texture_index = old_tex->texture_index;
270    tex->sampler_index = old_tex->sampler_index;
271    tex->is_array = old_tex->is_array;
272 
273    nir_def_init(&tex->instr, &tex->def, old_tex->def.num_components,
274                 old_tex->def.bit_size);
275    nir_builder_instr_insert(b, &tex->instr);
276 
277    return &tex->def;
278 }
279 
280 static unsigned
swizzle_to_component(VkComponentSwizzle swizzle)281 swizzle_to_component(VkComponentSwizzle swizzle)
282 {
283    switch (swizzle) {
284    case VK_COMPONENT_SWIZZLE_R:
285       return 0;
286    case VK_COMPONENT_SWIZZLE_G:
287       return 1;
288    case VK_COMPONENT_SWIZZLE_B:
289       return 2;
290    case VK_COMPONENT_SWIZZLE_A:
291       return 3;
292    default:
293       unreachable("invalid channel");
294       return 0;
295    }
296 }
297 
298 struct lower_ycbcr_tex_state {
299    nir_vk_ycbcr_conversion_lookup_cb cb;
300    const void *cb_data;
301 };
302 
303 static bool
lower_ycbcr_tex_instr(nir_builder * b,nir_instr * instr,void * _state)304 lower_ycbcr_tex_instr(nir_builder *b, nir_instr *instr, void *_state)
305 {
306    const struct lower_ycbcr_tex_state *state = _state;
307 
308    if (instr->type != nir_instr_type_tex)
309       return false;
310 
311    nir_tex_instr *tex = nir_instr_as_tex(instr);
312 
313    /* For the following instructions, we don't apply any change and let the
314     * instruction apply to the first plane.
315     */
316    if (tex->op == nir_texop_txs ||
317        tex->op == nir_texop_query_levels ||
318        tex->op == nir_texop_lod)
319       return false;
320 
321    int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
322    assert(deref_src_idx >= 0);
323    nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
324 
325    nir_variable *var = nir_deref_instr_get_variable(deref);
326    uint32_t set = var->data.descriptor_set;
327    uint32_t binding = var->data.binding;
328 
329    assert(tex->texture_index == 0);
330    unsigned array_index = 0;
331    if (deref->deref_type != nir_deref_type_var) {
332       assert(deref->deref_type == nir_deref_type_array);
333       if (!nir_src_is_const(deref->arr.index))
334          return false;
335       array_index = nir_src_as_uint(deref->arr.index);
336    }
337 
338    const struct vk_ycbcr_conversion_state *conversion =
339       state->cb(state->cb_data, set, binding, array_index);
340    if (conversion == NULL)
341       return false;
342 
343    const struct vk_format_ycbcr_info *format_ycbcr_info =
344       vk_format_get_ycbcr_info(conversion->format);
345 
346    /* This can happen if the driver hasn't done a good job of filtering on
347     * sampler creation and lets through a VkYcbcrConversion object which isn't
348     * actually YCbCr.  We're supposed to ignore those.
349     */
350    if (format_ycbcr_info == NULL)
351       return false;
352 
353    b->cursor = nir_before_instr(&tex->instr);
354 
355    VkFormat y_format = VK_FORMAT_UNDEFINED;
356    for (uint32_t p = 0; p < format_ycbcr_info->n_planes; p++) {
357       if (!format_ycbcr_info->planes[p].has_chroma)
358          y_format = format_ycbcr_info->planes[p].format;
359    }
360    assert(y_format != VK_FORMAT_UNDEFINED);
361    const struct util_format_description *y_format_desc =
362       util_format_description(vk_format_to_pipe_format(y_format));
363    uint8_t y_bpc = y_format_desc->channel[0].size;
364 
365    /* |ycbcr_comp| holds components in the order : Cr-Y-Cb */
366    nir_def *zero = nir_imm_float(b, 0.0f);
367    nir_def *one = nir_imm_float(b, 1.0f);
368    /* Use extra 2 channels for following swizzle */
369    nir_def *ycbcr_comp[5] = { zero, zero, zero, one, zero };
370 
371    uint8_t ycbcr_bpcs[5];
372    memset(ycbcr_bpcs, y_bpc, sizeof(ycbcr_bpcs));
373 
374    /* Go through all the planes and gather the samples into a |ycbcr_comp|
375     * while applying a swizzle required by the spec:
376     *
377     *    R, G, B should respectively map to Cr, Y, Cb
378     */
379    for (uint32_t p = 0; p < format_ycbcr_info->n_planes; p++) {
380       const struct vk_format_ycbcr_plane *format_plane =
381          &format_ycbcr_info->planes[p];
382 
383       struct ycbcr_state tex_state = {
384          .builder = b,
385          .origin_tex = tex,
386          .tex_deref = deref,
387          .conversion = conversion,
388          .format_ycbcr_info = format_ycbcr_info,
389       };
390       nir_def *plane_sample = create_plane_tex_instr_implicit(&tex_state, p);
391 
392       for (uint32_t pc = 0; pc < 4; pc++) {
393          VkComponentSwizzle ycbcr_swizzle = format_plane->ycbcr_swizzle[pc];
394          if (ycbcr_swizzle == VK_COMPONENT_SWIZZLE_ZERO)
395             continue;
396 
397          unsigned ycbcr_component = swizzle_to_component(ycbcr_swizzle);
398          ycbcr_comp[ycbcr_component] = nir_channel(b, plane_sample, pc);
399 
400          /* Also compute the number of bits for each component. */
401          const struct util_format_description *plane_format_desc =
402             util_format_description(vk_format_to_pipe_format(format_plane->format));
403          ycbcr_bpcs[ycbcr_component] = plane_format_desc->channel[pc].size;
404       }
405    }
406 
407    /* Now remaps components to the order specified by the conversion. */
408    nir_def *swizzled_comp[4] = { NULL, };
409    uint32_t swizzled_bpcs[4] = { 0, };
410 
411    for (uint32_t i = 0; i < ARRAY_SIZE(conversion->mapping); i++) {
412       /* Maps to components in |ycbcr_comp| */
413       static const uint32_t swizzle_mapping[] = {
414          [VK_COMPONENT_SWIZZLE_ZERO] = 4,
415          [VK_COMPONENT_SWIZZLE_ONE]  = 3,
416          [VK_COMPONENT_SWIZZLE_R]    = 0,
417          [VK_COMPONENT_SWIZZLE_G]    = 1,
418          [VK_COMPONENT_SWIZZLE_B]    = 2,
419          [VK_COMPONENT_SWIZZLE_A]    = 3,
420       };
421       const VkComponentSwizzle m = conversion->mapping[i];
422 
423       if (m == VK_COMPONENT_SWIZZLE_IDENTITY) {
424          swizzled_comp[i] = ycbcr_comp[i];
425          swizzled_bpcs[i] = ycbcr_bpcs[i];
426       } else {
427          swizzled_comp[i] = ycbcr_comp[swizzle_mapping[m]];
428          swizzled_bpcs[i] = ycbcr_bpcs[swizzle_mapping[m]];
429       }
430    }
431 
432    nir_def *result = nir_vec(b, swizzled_comp, 4);
433    if (conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) {
434       result = nir_convert_ycbcr_to_rgb(b, conversion->ycbcr_model,
435                                            conversion->ycbcr_range,
436                                            result,
437                                            swizzled_bpcs);
438    }
439 
440    nir_def_rewrite_uses(&tex->def, result);
441    nir_instr_remove(&tex->instr);
442 
443    return true;
444 }
445 
nir_vk_lower_ycbcr_tex(nir_shader * nir,nir_vk_ycbcr_conversion_lookup_cb cb,const void * cb_data)446 bool nir_vk_lower_ycbcr_tex(nir_shader *nir,
447                             nir_vk_ycbcr_conversion_lookup_cb cb,
448                             const void *cb_data)
449 {
450    struct lower_ycbcr_tex_state state = {
451       .cb = cb,
452       .cb_data = cb_data,
453    };
454 
455    return nir_shader_instructions_pass(nir, lower_ycbcr_tex_instr,
456                                        nir_metadata_block_index |
457                                        nir_metadata_dominance,
458                                        &state);
459 }
460