1 /**************************************************************************
2  *
3  * Copyright 2015 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "lp_bld_format.h"
29 #include "lp_bld_type.h"
30 #include "lp_bld_struct.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_flow.h"
33 #include "lp_bld_swizzle.h"
34 
35 #include "util/u_math.h"
36 
37 
38 /**
39  * @file
40  * Complex block-compression based formats are handled here by using a cache,
41  * so re-decoding of every pixel is not required.
42  * Especially for bilinear filtering, texel reuse is very high hence even
43  * a small cache helps.
44  * The elements in the cache are the decoded blocks - currently things
45  * are restricted to formats which are 4x4 block based, and the decoded
46  * texels must fit into 4x8 bits.
47  * The cache is direct mapped so hitrates aren't all that great and cache
48  * thrashing could happen.
49  *
50  * @author Roland Scheidegger <sroland@vmware.com>
51  */
52 
53 
54 #if LP_BUILD_FORMAT_CACHE_DEBUG
55 static void
update_cache_access(struct gallivm_state * gallivm,LLVMValueRef ptr,unsigned count,unsigned index)56 update_cache_access(struct gallivm_state *gallivm,
57                     LLVMValueRef ptr,
58                     unsigned count,
59                     unsigned index)
60 {
61    LLVMBuilderRef builder = gallivm->builder;
62    LLVMValueRef member_ptr, cache_access;
63 
64    assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL ||
65           index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
66 
67    member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, "");
68    cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access");
69    cache_access = LLVMBuildAdd(builder, cache_access,
70                                LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
71                                                                    count, 0), "");
72    LLVMBuildStore(builder, cache_access, member_ptr);
73 }
74 #endif
75 
76 
77 static void
store_cached_block(struct gallivm_state * gallivm,LLVMValueRef * col,LLVMValueRef tag_value,LLVMValueRef hash_index,LLVMValueRef cache)78 store_cached_block(struct gallivm_state *gallivm,
79                    LLVMValueRef *col,
80                    LLVMValueRef tag_value,
81                    LLVMValueRef hash_index,
82                    LLVMValueRef cache)
83 {
84    LLVMBuilderRef builder = gallivm->builder;
85    LLVMValueRef ptr, indices[3];
86    LLVMTypeRef type_ptr4x32;
87    unsigned count;
88 
89    type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0);
90    indices[0] = lp_build_const_int32(gallivm, 0);
91    indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
92    indices[2] = hash_index;
93    ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), "");
94    LLVMBuildStore(builder, tag_value, ptr);
95 
96    indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
97    hash_index = LLVMBuildMul(builder, hash_index,
98                              lp_build_const_int32(gallivm, 16), "");
99    for (count = 0; count < 4; count++) {
100       indices[2] = hash_index;
101       ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), "");
102       ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, "");
103       LLVMBuildStore(builder, col[count], ptr);
104       hash_index = LLVMBuildAdd(builder, hash_index,
105                                 lp_build_const_int32(gallivm, 4), "");
106    }
107 }
108 
109 
110 static LLVMValueRef
lookup_cached_pixel(struct gallivm_state * gallivm,LLVMValueRef ptr,LLVMValueRef index)111 lookup_cached_pixel(struct gallivm_state *gallivm,
112                     LLVMValueRef ptr,
113                     LLVMValueRef index)
114 {
115    LLVMBuilderRef builder = gallivm->builder;
116    LLVMValueRef member_ptr, indices[3];
117 
118    indices[0] = lp_build_const_int32(gallivm, 0);
119    indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
120    indices[2] = index;
121    member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), "");
122    return LLVMBuildLoad(builder, member_ptr, "cache_data");
123 }
124 
125 
126 static LLVMValueRef
lookup_tag_data(struct gallivm_state * gallivm,LLVMValueRef ptr,LLVMValueRef index)127 lookup_tag_data(struct gallivm_state *gallivm,
128                 LLVMValueRef ptr,
129                 LLVMValueRef index)
130 {
131    LLVMBuilderRef builder = gallivm->builder;
132    LLVMValueRef member_ptr, indices[3];
133 
134    indices[0] = lp_build_const_int32(gallivm, 0);
135    indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
136    indices[2] = index;
137    member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), "");
138    return LLVMBuildLoad(builder, member_ptr, "tag_data");
139 }
140 
141 
142 static void
update_cached_block(struct gallivm_state * gallivm,const struct util_format_description * format_desc,LLVMValueRef ptr_addr,LLVMValueRef hash_index,LLVMValueRef cache)143 update_cached_block(struct gallivm_state *gallivm,
144                     const struct util_format_description *format_desc,
145                     LLVMValueRef ptr_addr,
146                     LLVMValueRef hash_index,
147                     LLVMValueRef cache)
148 
149 {
150    LLVMBuilderRef builder = gallivm->builder;
151    LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
152    LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
153    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
154    LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
155    LLVMValueRef function;
156    LLVMValueRef tag_value, tmp_ptr;
157    LLVMValueRef col[4];
158    unsigned i, j;
159 
160    /*
161     * Use format_desc->fetch_rgba_8unorm() for each pixel in the block.
162     * This doesn't actually make any sense whatsoever, someone would need
163     * to write a function doing this for all pixels in a block (either as
164     * an external c function or with generated code). Don't ask.
165     */
166 
167    {
168       /*
169        * Function to call looks like:
170        *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
171        */
172       LLVMTypeRef ret_type;
173       LLVMTypeRef arg_types[4];
174       LLVMTypeRef function_type;
175 
176       assert(format_desc->fetch_rgba_8unorm);
177 
178       ret_type = LLVMVoidTypeInContext(gallivm->context);
179       arg_types[0] = pi8t;
180       arg_types[1] = pi8t;
181       arg_types[2] = i32t;
182       arg_types[3] = i32t;
183       function_type = LLVMFunctionType(ret_type, arg_types,
184                                        ARRAY_SIZE(arg_types), 0);
185 
186       /* make const pointer for the C fetch_rgba_8unorm function */
187       function = lp_build_const_int_pointer(gallivm,
188          func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
189 
190       /* cast the callee pointer to the function's type */
191       function = LLVMBuildBitCast(builder, function,
192                                   LLVMPointerType(function_type, 0),
193                                   "cast callee");
194    }
195 
196    tmp_ptr = lp_build_array_alloca(gallivm, i32x4,
197                                    lp_build_const_int32(gallivm, 16),
198                                    "tmp_decode_store");
199    tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
200 
201    /*
202     * Invoke format_desc->fetch_rgba_8unorm() for each pixel.
203     * This is going to be really really slow.
204     * Note: the block store format is actually
205     * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ...
206     */
207    for (i = 0; i < 4; ++i) {
208       for (j = 0; j < 4; ++j) {
209          LLVMValueRef args[4];
210          LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4);
211 
212          /*
213           * Note we actually supply a pointer to the start of the block,
214           * not the start of the texture.
215           */
216          args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, "");
217          args[1] = ptr_addr;
218          args[2] = LLVMConstInt(i32t, i, 0);
219          args[3] = LLVMConstInt(i32t, j, 0);
220          LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");
221       }
222    }
223 
224    /* Finally store the block - pointless mem copy + update tag. */
225    tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), "");
226    for (i = 0; i < 4; ++i) {
227       LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i);
228       LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, "");
229       col[i] = LLVMBuildLoad(builder, ptr, "");
230    }
231 
232    tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr,
233                                  LLVMInt64TypeInContext(gallivm->context), "");
234    store_cached_block(gallivm, col, tag_value, hash_index, cache);
235 }
236 
237 
238 /*
239  * Do a cached lookup.
240  *
241  * Returns (vectors of) 4x8 rgba aos value
242  */
243 LLVMValueRef
lp_build_fetch_cached_texels(struct gallivm_state * gallivm,const struct util_format_description * format_desc,unsigned n,LLVMValueRef base_ptr,LLVMValueRef offset,LLVMValueRef i,LLVMValueRef j,LLVMValueRef cache)244 lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
245                              const struct util_format_description *format_desc,
246                              unsigned n,
247                              LLVMValueRef base_ptr,
248                              LLVMValueRef offset,
249                              LLVMValueRef i,
250                              LLVMValueRef j,
251                              LLVMValueRef cache)
252 
253 {
254    LLVMBuilderRef builder = gallivm->builder;
255    unsigned count, low_bit, log2size;
256    LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
257    LLVMValueRef ij_index, hash_index, hash_mask, block_index;
258    LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
259    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
260    LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
261    struct lp_type type;
262    struct lp_build_context bld32;
263    memset(&type, 0, sizeof type);
264    type.width = 32;
265    type.length = n;
266 
267    assert(format_desc->block.width == 4);
268    assert(format_desc->block.height == 4);
269 
270    lp_build_context_init(&bld32, gallivm, type);
271 
272    /*
273     * compute hash - we use direct mapped cache, the hash function could
274     *                be better but it needs to be simple
275     * per-element:
276     *    compare offset with offset stored at tag (hash)
277     *    if not equal decode/store block, update tag
278     *    extract color from cache
279     *    assemble result vector
280     */
281 
282    /* TODO: not ideal with 32bit pointers... */
283 
284    low_bit = util_logbase2(format_desc->block.bits / 8);
285    log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
286    addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
287    ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
288    ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
289    /* For the hash function, first mask off the unused lowest bits. Then just
290       do some xor with address bits - only use lower 32bits */
291    ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
292    ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
293                                  lp_build_const_int_vec(gallivm, type, low_bit), "");
294    /* This only really makes sense for size 64,128,256 */
295    hash_index = ptr_addrtrunc;
296    ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
297                                  lp_build_const_int_vec(gallivm, type, 2*log2size), "");
298    hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
299    tmp = LLVMBuildLShr(builder, hash_index,
300                        lp_build_const_int_vec(gallivm, type, log2size), "");
301    hash_index = LLVMBuildXor(builder, hash_index, tmp, "");
302 
303    hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
304    hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
305    ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
306    ij_index = LLVMBuildAdd(builder, ij_index, j, "");
307    block_index = LLVMBuildShl(builder, hash_index,
308                               lp_build_const_int_vec(gallivm, type, 4), "");
309    block_index = LLVMBuildAdd(builder, ij_index, block_index, "");
310 
311    if (n > 1) {
312       color = LLVMGetUndef(LLVMVectorType(i32t, n));
313       for (count = 0; count < n; count++) {
314          LLVMValueRef index, cond, colorx;
315          LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
316          struct lp_build_if_state if_ctx;
317 
318          index = lp_build_const_int32(gallivm, count);
319          offsetx = LLVMBuildExtractElement(builder, offset, index, "");
320          addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
321          addrx = LLVMBuildAdd(builder, addrx, addr, "");
322          block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
323          hash_indexx = LLVMBuildLShr(builder, block_indexx,
324                                      lp_build_const_int32(gallivm, 4), "");
325          offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
326          cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");
327 
328          lp_build_if(&if_ctx, gallivm, cond);
329          {
330             ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
331                                           LLVMPointerType(i8t, 0), "");
332             update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
333 #if LP_BUILD_FORMAT_CACHE_DEBUG
334             update_cache_access(gallivm, cache, 1,
335                                 LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
336 #endif
337          }
338          lp_build_endif(&if_ctx);
339 
340          colorx = lookup_cached_pixel(gallivm, cache, block_indexx);
341 
342          color = LLVMBuildInsertElement(builder, color, colorx,
343                                         lp_build_const_int32(gallivm, count), "");
344       }
345    }
346    else {
347       LLVMValueRef cond;
348       struct lp_build_if_state if_ctx;
349 
350       tmp = LLVMBuildZExt(builder, offset, i64t, "");
351       addr = LLVMBuildAdd(builder, tmp, addr, "");
352       offset_stored = lookup_tag_data(gallivm, cache, hash_index);
353       cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");
354 
355       lp_build_if(&if_ctx, gallivm, cond);
356       {
357          tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
358          update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
359 #if LP_BUILD_FORMAT_CACHE_DEBUG
360          update_cache_access(gallivm, cache, 1,
361                              LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
362 #endif
363       }
364       lp_build_endif(&if_ctx);
365 
366       color = lookup_cached_pixel(gallivm, cache, block_index);
367    }
368 #if LP_BUILD_FORMAT_CACHE_DEBUG
369    update_cache_access(gallivm, cache, n,
370                        LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
371 #endif
372    return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
373 }
374 
375