1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright 2003 VMware, Inc.
5  * Copyright 2009 VMware, Inc.
6  * All Rights Reserved.
7  * Copyright (C) 2016 Advanced Micro Devices, Inc.
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26  * USE OR OTHER DEALINGS IN THE SOFTWARE.
27  */
28 
29 #include "main/glheader.h"
30 #include "main/context.h"
31 #include "main/varray.h"
32 #include "main/macros.h"
33 #include "main/sse_minmax.h"
34 #include "x86/common_x86_asm.h"
35 #include "util/hash_table.h"
36 
37 
38 struct minmax_cache_key {
39    GLintptr offset;
40    GLuint count;
41    GLenum type;
42 };
43 
44 
45 struct minmax_cache_entry {
46    struct minmax_cache_key key;
47    GLuint min;
48    GLuint max;
49 };
50 
51 
52 static uint32_t
vbo_minmax_cache_hash(const struct minmax_cache_key * key)53 vbo_minmax_cache_hash(const struct minmax_cache_key *key)
54 {
55    return _mesa_hash_data(key, sizeof(*key));
56 }
57 
58 
59 static bool
vbo_minmax_cache_key_equal(const struct minmax_cache_key * a,const struct minmax_cache_key * b)60 vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
61                            const struct minmax_cache_key *b)
62 {
63    return (a->offset == b->offset) && (a->count == b->count) && (a->type == b->type);
64 }
65 
66 
67 static void
vbo_minmax_cache_delete_entry(struct hash_entry * entry)68 vbo_minmax_cache_delete_entry(struct hash_entry *entry)
69 {
70    free(entry->data);
71 }
72 
73 
74 static GLboolean
vbo_use_minmax_cache(struct gl_buffer_object * bufferObj)75 vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
76 {
77    if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
78                                   USAGE_ATOMIC_COUNTER_BUFFER |
79                                   USAGE_SHADER_STORAGE_BUFFER |
80                                   USAGE_TRANSFORM_FEEDBACK_BUFFER |
81                                   USAGE_PIXEL_PACK_BUFFER |
82                                   USAGE_DISABLE_MINMAX_CACHE))
83       return GL_FALSE;
84 
85    if ((bufferObj->Mappings[MAP_USER].AccessFlags &
86         (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
87        (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
88       return GL_FALSE;
89 
90    return GL_TRUE;
91 }
92 
93 
94 void
vbo_delete_minmax_cache(struct gl_buffer_object * bufferObj)95 vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
96 {
97    _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
98    bufferObj->MinMaxCache = NULL;
99 }
100 
101 
102 static GLboolean
vbo_get_minmax_cached(struct gl_buffer_object * bufferObj,GLenum type,GLintptr offset,GLuint count,GLuint * min_index,GLuint * max_index)103 vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
104                       GLenum type, GLintptr offset, GLuint count,
105                       GLuint *min_index, GLuint *max_index)
106 {
107    GLboolean found = GL_FALSE;
108    struct minmax_cache_key key;
109    uint32_t hash;
110    struct hash_entry *result;
111 
112    if (!bufferObj->MinMaxCache)
113       return GL_FALSE;
114    if (!vbo_use_minmax_cache(bufferObj))
115       return GL_FALSE;
116 
117    mtx_lock(&bufferObj->Mutex);
118 
119    if (bufferObj->MinMaxCacheDirty) {
120       /* Disable the cache permanently for this BO if the number of hits
121        * is asymptotically less than the number of misses. This happens when
122        * applications use the BO for streaming.
123        *
124        * However, some initial optimism allows applications that interleave
125        * draw calls with glBufferSubData during warmup.
126        */
127       unsigned optimism = bufferObj->Size;
128       if (bufferObj->MinMaxCacheMissIndices > optimism &&
129           bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
130          bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
131          vbo_delete_minmax_cache(bufferObj);
132          goto out_disable;
133       }
134 
135       _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
136       bufferObj->MinMaxCacheDirty = false;
137       goto out_invalidate;
138    }
139 
140    key.type = type;
141    key.offset = offset;
142    key.count = count;
143    hash = vbo_minmax_cache_hash(&key);
144    result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
145    if (result) {
146       struct minmax_cache_entry *entry = result->data;
147       *min_index = entry->min;
148       *max_index = entry->max;
149       found = GL_TRUE;
150    }
151 
152 out_invalidate:
153    if (found) {
154       /* The hit counter saturates so that we don't accidently disable the
155        * cache in a long-running program.
156        */
157       unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
158 
159       if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
160          bufferObj->MinMaxCacheHitIndices = new_hit_count;
161       else
162          bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
163    } else {
164       bufferObj->MinMaxCacheMissIndices += count;
165    }
166 
167 out_disable:
168    mtx_unlock(&bufferObj->Mutex);
169    return found;
170 }
171 
172 
173 static void
vbo_minmax_cache_store(struct gl_context * ctx,struct gl_buffer_object * bufferObj,GLenum type,GLintptr offset,GLuint count,GLuint min,GLuint max)174 vbo_minmax_cache_store(struct gl_context *ctx,
175                        struct gl_buffer_object *bufferObj,
176                        GLenum type, GLintptr offset, GLuint count,
177                        GLuint min, GLuint max)
178 {
179    struct minmax_cache_entry *entry;
180    struct hash_entry *table_entry;
181    uint32_t hash;
182 
183    if (!vbo_use_minmax_cache(bufferObj))
184       return;
185 
186    mtx_lock(&bufferObj->Mutex);
187 
188    if (!bufferObj->MinMaxCache) {
189       bufferObj->MinMaxCache =
190          _mesa_hash_table_create(NULL,
191                                  (uint32_t (*)(const void *))vbo_minmax_cache_hash,
192                                  (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
193       if (!bufferObj->MinMaxCache)
194          goto out;
195    }
196 
197    entry = MALLOC_STRUCT(minmax_cache_entry);
198    if (!entry)
199       goto out;
200 
201    entry->key.offset = offset;
202    entry->key.count = count;
203    entry->key.type = type;
204    entry->min = min;
205    entry->max = max;
206    hash = vbo_minmax_cache_hash(&entry->key);
207 
208    table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
209                                                     hash, &entry->key);
210    if (table_entry) {
211       /* It seems like this could happen when two contexts are rendering using
212        * the same buffer object from multiple threads.
213        */
214       _mesa_debug(ctx, "duplicate entry in minmax cache\n");
215       free(entry);
216       goto out;
217    }
218 
219    table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
220                                                     hash, &entry->key, entry);
221    if (!table_entry)
222       free(entry);
223 
224 out:
225    mtx_unlock(&bufferObj->Mutex);
226 }
227 
228 
229 /**
230  * Compute min and max elements by scanning the index buffer for
231  * glDraw[Range]Elements() calls.
232  * If primitive restart is enabled, we need to ignore restart
233  * indexes when computing min/max.
234  */
235 static void
vbo_get_minmax_index(struct gl_context * ctx,const struct _mesa_prim * prim,const struct _mesa_index_buffer * ib,GLuint * min_index,GLuint * max_index,const GLuint count)236 vbo_get_minmax_index(struct gl_context *ctx,
237                      const struct _mesa_prim *prim,
238                      const struct _mesa_index_buffer *ib,
239                      GLuint *min_index, GLuint *max_index,
240                      const GLuint count)
241 {
242    const GLboolean restart = ctx->Array._PrimitiveRestart;
243    const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->type);
244    const int index_size = vbo_sizeof_ib_type(ib->type);
245    const char *indices;
246    GLuint i;
247 
248    indices = (char *) ib->ptr + prim->start * index_size;
249    if (_mesa_is_bufferobj(ib->obj)) {
250       GLsizeiptr size = MIN2(count * index_size, ib->obj->Size);
251 
252       if (vbo_get_minmax_cached(ib->obj, ib->type, (GLintptr) indices, count,
253                                 min_index, max_index))
254          return;
255 
256       indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
257                                            GL_MAP_READ_BIT, ib->obj,
258                                            MAP_INTERNAL);
259    }
260 
261    switch (ib->type) {
262    case GL_UNSIGNED_INT: {
263       const GLuint *ui_indices = (const GLuint *)indices;
264       GLuint max_ui = 0;
265       GLuint min_ui = ~0U;
266       if (restart) {
267          for (i = 0; i < count; i++) {
268             if (ui_indices[i] != restartIndex) {
269                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
270                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
271             }
272          }
273       }
274       else {
275 #if defined(USE_SSE41)
276          if (cpu_has_sse4_1) {
277             _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
278          }
279          else
280 #endif
281             for (i = 0; i < count; i++) {
282                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
283                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
284             }
285       }
286       *min_index = min_ui;
287       *max_index = max_ui;
288       break;
289    }
290    case GL_UNSIGNED_SHORT: {
291       const GLushort *us_indices = (const GLushort *)indices;
292       GLuint max_us = 0;
293       GLuint min_us = ~0U;
294       if (restart) {
295          for (i = 0; i < count; i++) {
296             if (us_indices[i] != restartIndex) {
297                if (us_indices[i] > max_us) max_us = us_indices[i];
298                if (us_indices[i] < min_us) min_us = us_indices[i];
299             }
300          }
301       }
302       else {
303          for (i = 0; i < count; i++) {
304             if (us_indices[i] > max_us) max_us = us_indices[i];
305             if (us_indices[i] < min_us) min_us = us_indices[i];
306          }
307       }
308       *min_index = min_us;
309       *max_index = max_us;
310       break;
311    }
312    case GL_UNSIGNED_BYTE: {
313       const GLubyte *ub_indices = (const GLubyte *)indices;
314       GLuint max_ub = 0;
315       GLuint min_ub = ~0U;
316       if (restart) {
317          for (i = 0; i < count; i++) {
318             if (ub_indices[i] != restartIndex) {
319                if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
320                if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
321             }
322          }
323       }
324       else {
325          for (i = 0; i < count; i++) {
326             if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
327             if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
328          }
329       }
330       *min_index = min_ub;
331       *max_index = max_ub;
332       break;
333    }
334    default:
335       unreachable("not reached");
336    }
337 
338    if (_mesa_is_bufferobj(ib->obj)) {
339       vbo_minmax_cache_store(ctx, ib->obj, ib->type, prim->start, count,
340                              *min_index, *max_index);
341       ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
342    }
343 }
344 
345 /**
346  * Compute min and max elements for nr_prims
347  */
348 void
vbo_get_minmax_indices(struct gl_context * ctx,const struct _mesa_prim * prims,const struct _mesa_index_buffer * ib,GLuint * min_index,GLuint * max_index,GLuint nr_prims)349 vbo_get_minmax_indices(struct gl_context *ctx,
350                        const struct _mesa_prim *prims,
351                        const struct _mesa_index_buffer *ib,
352                        GLuint *min_index,
353                        GLuint *max_index,
354                        GLuint nr_prims)
355 {
356    GLuint tmp_min, tmp_max;
357    GLuint i;
358    GLuint count;
359 
360    *min_index = ~0;
361    *max_index = 0;
362 
363    for (i = 0; i < nr_prims; i++) {
364       const struct _mesa_prim *start_prim;
365 
366       start_prim = &prims[i];
367       count = start_prim->count;
368       /* Do combination if possible to reduce map/unmap count */
369       while ((i + 1 < nr_prims) &&
370              (prims[i].start + prims[i].count == prims[i+1].start)) {
371          count += prims[i+1].count;
372          i++;
373       }
374       vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
375       *min_index = MIN2(*min_index, tmp_min);
376       *max_index = MAX2(*max_index, tmp_max);
377    }
378 }
379