1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 #include "lp_bld_type.h"
30 #include "lp_bld_arit.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_swizzle.h"
33 #include "lp_bld_quad.h"
34 
35 
36 static const unsigned char
37 swizzle_left[4] = {
38    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_LEFT,
39    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_LEFT
40 };
41 
42 static const unsigned char
43 swizzle_right[4] = {
44    LP_BLD_QUAD_TOP_RIGHT,    LP_BLD_QUAD_TOP_RIGHT,
45    LP_BLD_QUAD_BOTTOM_RIGHT, LP_BLD_QUAD_BOTTOM_RIGHT
46 };
47 
48 static const unsigned char
49 swizzle_top[4] = {
50    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_RIGHT,
51    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_RIGHT
52 };
53 
54 static const unsigned char
55 swizzle_bottom[4] = {
56    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_RIGHT,
57    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_RIGHT
58 };
59 
60 
61 LLVMValueRef
lp_build_ddx(struct lp_build_context * bld,LLVMValueRef a)62 lp_build_ddx(struct lp_build_context *bld,
63              LLVMValueRef a)
64 {
65    LLVMValueRef a_left  = lp_build_swizzle_aos(bld, a, swizzle_left);
66    LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
67    return lp_build_sub(bld, a_right, a_left);
68 }
69 
70 
71 LLVMValueRef
lp_build_ddy(struct lp_build_context * bld,LLVMValueRef a)72 lp_build_ddy(struct lp_build_context *bld,
73              LLVMValueRef a)
74 {
75    LLVMValueRef a_top    = lp_build_swizzle_aos(bld, a, swizzle_top);
76    LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
77    return lp_build_sub(bld, a_bottom, a_top);
78 }
79 
80 /*
81  * To be able to handle multiple quads at once in texture sampling and
82  * do lod calculations per quad, it is necessary to get the per-quad
83  * derivatives into the lp_build_rho function.
84  * For 8-wide vectors the packed derivative values for 3 coords would
85  * look like this, this scales to a arbitrary (multiple of 4) vector size:
86  * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
87  * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____
88  * The second vector will be unused for 1d and 2d textures.
89  */
90 LLVMValueRef
lp_build_packed_ddx_ddy_onecoord(struct lp_build_context * bld,LLVMValueRef a)91 lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
92                                  LLVMValueRef a)
93 {
94    struct gallivm_state *gallivm = bld->gallivm;
95    LLVMBuilderRef builder = gallivm->builder;
96    LLVMValueRef vec1, vec2;
97 
98    /* same packing as _twocoord, but can use aos swizzle helper */
99 
100    /*
101     * XXX could make swizzle1 a noop swizzle by using right top/bottom
102     * pair for ddy
103     */
104    static const unsigned char swizzle1[] = {
105       LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT,
106       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
107    };
108    static const unsigned char swizzle2[] = {
109       LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_BOTTOM_LEFT,
110       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
111    };
112 
113    vec1 = lp_build_swizzle_aos(bld, a, swizzle1);
114    vec2 = lp_build_swizzle_aos(bld, a, swizzle2);
115 
116    if (bld->type.floating)
117       return LLVMBuildFSub(builder, vec2, vec1, "ddxddy");
118    else
119       return LLVMBuildSub(builder, vec2, vec1, "ddxddy");
120 }
121 
122 
123 LLVMValueRef
lp_build_packed_ddx_ddy_twocoord(struct lp_build_context * bld,LLVMValueRef a,LLVMValueRef b)124 lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
125                                  LLVMValueRef a, LLVMValueRef b)
126 {
127    struct gallivm_state *gallivm = bld->gallivm;
128    LLVMBuilderRef builder = gallivm->builder;
129    LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4];
130    LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4];
131    LLVMValueRef vec1, vec2;
132    unsigned length, num_quads, i;
133 
134    /* XXX: do hsub version */
135    length = bld->type.length;
136    num_quads = length / 4;
137    for (i = 0; i < num_quads; i++) {
138       unsigned s1 = 4 * i;
139       unsigned s2 = 4 * i + length;
140       shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
141       shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
142       shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
143       shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
144       shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1);
145       shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1);
146       shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2);
147       shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2);
148    }
149    vec1 = LLVMBuildShuffleVector(builder, a, b,
150                                  LLVMConstVector(shuffles1, length), "");
151    vec2 = LLVMBuildShuffleVector(builder, a, b,
152                                  LLVMConstVector(shuffles2, length), "");
153    if (bld->type.floating)
154       return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy");
155    else
156       return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
157 }
158 
159