1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 /**
30  * @file
31  * YUV pixel format manipulation.
32  *
33  * @author Jose Fonseca <jfonseca@vmware.com>
34  */
35 
36 
37 #include "util/u_format.h"
38 #include "util/u_cpu_detect.h"
39 
40 #include "lp_bld_arit.h"
41 #include "lp_bld_type.h"
42 #include "lp_bld_const.h"
43 #include "lp_bld_conv.h"
44 #include "lp_bld_gather.h"
45 #include "lp_bld_format.h"
46 #include "lp_bld_init.h"
47 #include "lp_bld_logic.h"
48 
49 /**
50  * Extract Y, U, V channels from packed UYVY.
51  * @param packed  is a <n x i32> vector with the packed UYVY blocks
52  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
53  */
54 static void
uyvy_to_yuv_soa(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i,LLVMValueRef * y,LLVMValueRef * u,LLVMValueRef * v)55 uyvy_to_yuv_soa(struct gallivm_state *gallivm,
56                 unsigned n,
57                 LLVMValueRef packed,
58                 LLVMValueRef i,
59                 LLVMValueRef *y,
60                 LLVMValueRef *u,
61                 LLVMValueRef *v)
62 {
63    LLVMBuilderRef builder = gallivm->builder;
64    struct lp_type type;
65    LLVMValueRef mask;
66 
67    memset(&type, 0, sizeof type);
68    type.width = 32;
69    type.length = n;
70 
71    assert(lp_check_value(type, packed));
72    assert(lp_check_value(type, i));
73 
74    /*
75     * y = (uyvy >> (16*i + 8)) & 0xff
76     * u = (uyvy        ) & 0xff
77     * v = (uyvy >> 16  ) & 0xff
78     */
79 
80 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
81    /*
82     * Avoid shift with per-element count.
83     * No support on x86, gets translated to roughly 5 instructions
84     * per element. Didn't measure performance but cuts shader size
85     * by quite a bit (less difference if cpu has no sse4.1 support).
86     */
87    if (util_cpu_caps.has_sse2 && n > 1) {
88       LLVMValueRef sel, tmp, tmp2;
89       struct lp_build_context bld32;
90 
91       lp_build_context_init(&bld32, gallivm, type);
92 
93       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
94       tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
95       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
96       *y = lp_build_select(&bld32, sel, tmp, tmp2);
97    } else
98 #endif
99    {
100       LLVMValueRef shift;
101       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
102       shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
103       *y = LLVMBuildLShr(builder, packed, shift, "");
104    }
105 
106    *u = packed;
107    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
108 
109    mask = lp_build_const_int_vec(gallivm, type, 0xff);
110 
111    *y = LLVMBuildAnd(builder, *y, mask, "y");
112    *u = LLVMBuildAnd(builder, *u, mask, "u");
113    *v = LLVMBuildAnd(builder, *v, mask, "v");
114 }
115 
116 
117 /**
118  * Extract Y, U, V channels from packed YUYV.
119  * @param packed  is a <n x i32> vector with the packed YUYV blocks
120  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
121  */
122 static void
yuyv_to_yuv_soa(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i,LLVMValueRef * y,LLVMValueRef * u,LLVMValueRef * v)123 yuyv_to_yuv_soa(struct gallivm_state *gallivm,
124                 unsigned n,
125                 LLVMValueRef packed,
126                 LLVMValueRef i,
127                 LLVMValueRef *y,
128                 LLVMValueRef *u,
129                 LLVMValueRef *v)
130 {
131    LLVMBuilderRef builder = gallivm->builder;
132    struct lp_type type;
133    LLVMValueRef mask;
134 
135    memset(&type, 0, sizeof type);
136    type.width = 32;
137    type.length = n;
138 
139    assert(lp_check_value(type, packed));
140    assert(lp_check_value(type, i));
141 
142    /*
143     * y = (yuyv >> 16*i) & 0xff
144     * u = (yuyv >> 8   ) & 0xff
145     * v = (yuyv >> 24  ) & 0xff
146     */
147 
148 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
149    /*
150     * Avoid shift with per-element count.
151     * No support on x86, gets translated to roughly 5 instructions
152     * per element. Didn't measure performance but cuts shader size
153     * by quite a bit (less difference if cpu has no sse4.1 support).
154     */
155    if (util_cpu_caps.has_sse2 && n > 1) {
156       LLVMValueRef sel, tmp;
157       struct lp_build_context bld32;
158 
159       lp_build_context_init(&bld32, gallivm, type);
160 
161       tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
162       sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
163        *y = lp_build_select(&bld32, sel, packed, tmp);
164    } else
165 #endif
166    {
167       LLVMValueRef shift;
168       shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
169       *y = LLVMBuildLShr(builder, packed, shift, "");
170    }
171 
172    *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
173    *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
174 
175    mask = lp_build_const_int_vec(gallivm, type, 0xff);
176 
177    *y = LLVMBuildAnd(builder, *y, mask, "y");
178    *u = LLVMBuildAnd(builder, *u, mask, "u");
179    *v = LLVMBuildAnd(builder, *v, mask, "v");
180 }
181 
182 
183 static INLINE void
yuv_to_rgb_soa(struct gallivm_state * gallivm,unsigned n,LLVMValueRef y,LLVMValueRef u,LLVMValueRef v,LLVMValueRef * r,LLVMValueRef * g,LLVMValueRef * b)184 yuv_to_rgb_soa(struct gallivm_state *gallivm,
185                unsigned n,
186                LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
187                LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
188 {
189    LLVMBuilderRef builder = gallivm->builder;
190    struct lp_type type;
191    struct lp_build_context bld;
192 
193    LLVMValueRef c0;
194    LLVMValueRef c8;
195    LLVMValueRef c16;
196    LLVMValueRef c128;
197    LLVMValueRef c255;
198 
199    LLVMValueRef cy;
200    LLVMValueRef cug;
201    LLVMValueRef cub;
202    LLVMValueRef cvr;
203    LLVMValueRef cvg;
204 
205    memset(&type, 0, sizeof type);
206    type.sign = TRUE;
207    type.width = 32;
208    type.length = n;
209 
210    lp_build_context_init(&bld, gallivm, type);
211 
212    assert(lp_check_value(type, y));
213    assert(lp_check_value(type, u));
214    assert(lp_check_value(type, v));
215 
216    /*
217     * Constants
218     */
219 
220    c0   = lp_build_const_int_vec(gallivm, type,   0);
221    c8   = lp_build_const_int_vec(gallivm, type,   8);
222    c16  = lp_build_const_int_vec(gallivm, type,  16);
223    c128 = lp_build_const_int_vec(gallivm, type, 128);
224    c255 = lp_build_const_int_vec(gallivm, type, 255);
225 
226    cy  = lp_build_const_int_vec(gallivm, type,  298);
227    cug = lp_build_const_int_vec(gallivm, type, -100);
228    cub = lp_build_const_int_vec(gallivm, type,  516);
229    cvr = lp_build_const_int_vec(gallivm, type,  409);
230    cvg = lp_build_const_int_vec(gallivm, type, -208);
231 
232    /*
233     *  y -= 16;
234     *  u -= 128;
235     *  v -= 128;
236     */
237 
238    y = LLVMBuildSub(builder, y, c16, "");
239    u = LLVMBuildSub(builder, u, c128, "");
240    v = LLVMBuildSub(builder, v, c128, "");
241 
242    /*
243     * r = 298 * _y            + 409 * _v + 128;
244     * g = 298 * _y - 100 * _u - 208 * _v + 128;
245     * b = 298 * _y + 516 * _u            + 128;
246     */
247 
248    y = LLVMBuildMul(builder, y, cy, "");
249    y = LLVMBuildAdd(builder, y, c128, "");
250 
251    *r = LLVMBuildMul(builder, v, cvr, "");
252    *g = LLVMBuildAdd(builder,
253                      LLVMBuildMul(builder, u, cug, ""),
254                      LLVMBuildMul(builder, v, cvg, ""),
255                      "");
256    *b = LLVMBuildMul(builder, u, cub, "");
257 
258    *r = LLVMBuildAdd(builder, *r, y, "");
259    *g = LLVMBuildAdd(builder, *g, y, "");
260    *b = LLVMBuildAdd(builder, *b, y, "");
261 
262    /*
263     * r >>= 8;
264     * g >>= 8;
265     * b >>= 8;
266     */
267 
268    *r = LLVMBuildAShr(builder, *r, c8, "r");
269    *g = LLVMBuildAShr(builder, *g, c8, "g");
270    *b = LLVMBuildAShr(builder, *b, c8, "b");
271 
272    /*
273     * Clamp
274     */
275 
276    *r = lp_build_clamp(&bld, *r, c0, c255);
277    *g = lp_build_clamp(&bld, *g, c0, c255);
278    *b = lp_build_clamp(&bld, *b, c0, c255);
279 }
280 
281 
282 static LLVMValueRef
rgb_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef r,LLVMValueRef g,LLVMValueRef b)283 rgb_to_rgba_aos(struct gallivm_state *gallivm,
284                 unsigned n,
285                 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
286 {
287    LLVMBuilderRef builder = gallivm->builder;
288    struct lp_type type;
289    LLVMValueRef a;
290    LLVMValueRef rgba;
291 
292    memset(&type, 0, sizeof type);
293    type.sign = TRUE;
294    type.width = 32;
295    type.length = n;
296 
297    assert(lp_check_value(type, r));
298    assert(lp_check_value(type, g));
299    assert(lp_check_value(type, b));
300 
301    /*
302     * Make a 4 x unorm8 vector
303     */
304 
305    r = r;
306    g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
307    b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
308    a = lp_build_const_int_vec(gallivm, type, 0xff000000);
309 
310    rgba = r;
311    rgba = LLVMBuildOr(builder, rgba, g, "");
312    rgba = LLVMBuildOr(builder, rgba, b, "");
313    rgba = LLVMBuildOr(builder, rgba, a, "");
314 
315    rgba = LLVMBuildBitCast(builder, rgba,
316                            LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
317 
318    return rgba;
319 }
320 
321 
322 /**
323  * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
324  */
325 static LLVMValueRef
uyvy_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)326 uyvy_to_rgba_aos(struct gallivm_state *gallivm,
327                  unsigned n,
328                  LLVMValueRef packed,
329                  LLVMValueRef i)
330 {
331    LLVMValueRef y, u, v;
332    LLVMValueRef r, g, b;
333    LLVMValueRef rgba;
334 
335    uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
336    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
337    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
338 
339    return rgba;
340 }
341 
342 
343 /**
344  * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
345  */
346 static LLVMValueRef
yuyv_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)347 yuyv_to_rgba_aos(struct gallivm_state *gallivm,
348                  unsigned n,
349                  LLVMValueRef packed,
350                  LLVMValueRef i)
351 {
352    LLVMValueRef y, u, v;
353    LLVMValueRef r, g, b;
354    LLVMValueRef rgba;
355 
356    yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
357    yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
358    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
359 
360    return rgba;
361 }
362 
363 
364 /**
365  * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
366  */
367 static LLVMValueRef
rgbg_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)368 rgbg_to_rgba_aos(struct gallivm_state *gallivm,
369                  unsigned n,
370                  LLVMValueRef packed,
371                  LLVMValueRef i)
372 {
373    LLVMValueRef r, g, b;
374    LLVMValueRef rgba;
375 
376    uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
377    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
378 
379    return rgba;
380 }
381 
382 
383 /**
384  * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
385  */
386 static LLVMValueRef
grgb_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)387 grgb_to_rgba_aos(struct gallivm_state *gallivm,
388                  unsigned n,
389                  LLVMValueRef packed,
390                  LLVMValueRef i)
391 {
392    LLVMValueRef r, g, b;
393    LLVMValueRef rgba;
394 
395    yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
396    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
397 
398    return rgba;
399 }
400 
401 /**
402  * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
403  */
404 static LLVMValueRef
grbr_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)405 grbr_to_rgba_aos(struct gallivm_state *gallivm,
406                  unsigned n,
407                  LLVMValueRef packed,
408                  LLVMValueRef i)
409 {
410    LLVMValueRef r, g, b;
411    LLVMValueRef rgba;
412 
413    uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
414    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
415 
416    return rgba;
417 }
418 
419 
420 /**
421  * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
422  */
423 static LLVMValueRef
rgrb_to_rgba_aos(struct gallivm_state * gallivm,unsigned n,LLVMValueRef packed,LLVMValueRef i)424 rgrb_to_rgba_aos(struct gallivm_state *gallivm,
425                  unsigned n,
426                  LLVMValueRef packed,
427                  LLVMValueRef i)
428 {
429    LLVMValueRef r, g, b;
430    LLVMValueRef rgba;
431 
432    yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
433    rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
434 
435    return rgba;
436 }
437 
438 /**
439  * @param n  is the number of pixels processed
440  * @param packed  is a <n x i32> vector with the packed YUYV blocks
441  * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
442  * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
443  */
444 LLVMValueRef
lp_build_fetch_subsampled_rgba_aos(struct gallivm_state * gallivm,const struct util_format_description * format_desc,unsigned n,LLVMValueRef base_ptr,LLVMValueRef offset,LLVMValueRef i,LLVMValueRef j)445 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
446                                    const struct util_format_description *format_desc,
447                                    unsigned n,
448                                    LLVMValueRef base_ptr,
449                                    LLVMValueRef offset,
450                                    LLVMValueRef i,
451                                    LLVMValueRef j)
452 {
453    LLVMValueRef packed;
454    LLVMValueRef rgba;
455 
456    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
457    assert(format_desc->block.bits == 32);
458    assert(format_desc->block.width == 2);
459    assert(format_desc->block.height == 1);
460 
461    packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset);
462 
463    (void)j;
464 
465    switch (format_desc->format) {
466    case PIPE_FORMAT_UYVY:
467       rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
468       break;
469    case PIPE_FORMAT_YUYV:
470       rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
471       break;
472    case PIPE_FORMAT_R8G8_B8G8_UNORM:
473       rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
474       break;
475    case PIPE_FORMAT_G8R8_G8B8_UNORM:
476       rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
477       break;
478    case PIPE_FORMAT_G8R8_B8R8_UNORM:
479       rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
480       break;
481    case PIPE_FORMAT_R8G8_R8B8_UNORM:
482       rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
483       break;
484    default:
485       assert(0);
486       rgba =  LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
487       break;
488    }
489 
490    return rgba;
491 }
492 
493