1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "tu_private.h"
25 
26 #include "util/debug.h"
27 #include "util/disk_cache.h"
28 #include "util/mesa-sha1.h"
29 #include "util/u_atomic.h"
30 #include "vulkan/util/vk_util.h"
31 
32 struct cache_entry_variant_info
33 {
34 };
35 
36 struct cache_entry
37 {
38    union {
39       unsigned char sha1[20];
40       uint32_t sha1_dw[5];
41    };
42    uint32_t code_sizes[MESA_SHADER_STAGES];
43    struct tu_shader_variant *variants[MESA_SHADER_STAGES];
44    char code[0];
45 };
46 
47 static void
tu_pipeline_cache_init(struct tu_pipeline_cache * cache,struct tu_device * device)48 tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
49                        struct tu_device *device)
50 {
51    cache->device = device;
52    pthread_mutex_init(&cache->mutex, NULL);
53 
54    cache->modified = false;
55    cache->kernel_count = 0;
56    cache->total_size = 0;
57    cache->table_size = 1024;
58    const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
59    cache->hash_table = malloc(byte_size);
60 
61    /* We don't consider allocation failure fatal, we just start with a 0-sized
62     * cache. Disable caching when we want to keep shader debug info, since
63     * we don't get the debug info on cached shaders. */
64    if (cache->hash_table == NULL)
65       cache->table_size = 0;
66    else
67       memset(cache->hash_table, 0, byte_size);
68 }
69 
70 static void
tu_pipeline_cache_finish(struct tu_pipeline_cache * cache)71 tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
72 {
73    for (unsigned i = 0; i < cache->table_size; ++i)
74       if (cache->hash_table[i]) {
75          vk_free(&cache->alloc, cache->hash_table[i]);
76       }
77    pthread_mutex_destroy(&cache->mutex);
78    free(cache->hash_table);
79 }
80 
81 static uint32_t
entry_size(struct cache_entry * entry)82 entry_size(struct cache_entry *entry)
83 {
84    size_t ret = sizeof(*entry);
85    for (int i = 0; i < MESA_SHADER_STAGES; ++i)
86       if (entry->code_sizes[i])
87          ret +=
88             sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
89    return ret;
90 }
91 
92 static struct cache_entry *
tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache * cache,const unsigned char * sha1)93 tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
94                                   const unsigned char *sha1)
95 {
96    const uint32_t mask = cache->table_size - 1;
97    const uint32_t start = (*(uint32_t *) sha1);
98 
99    if (cache->table_size == 0)
100       return NULL;
101 
102    for (uint32_t i = 0; i < cache->table_size; i++) {
103       const uint32_t index = (start + i) & mask;
104       struct cache_entry *entry = cache->hash_table[index];
105 
106       if (!entry)
107          return NULL;
108 
109       if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
110          return entry;
111       }
112    }
113 
114    unreachable("hash table should never be full");
115 }
116 
117 static struct cache_entry *
tu_pipeline_cache_search(struct tu_pipeline_cache * cache,const unsigned char * sha1)118 tu_pipeline_cache_search(struct tu_pipeline_cache *cache,
119                          const unsigned char *sha1)
120 {
121    struct cache_entry *entry;
122 
123    pthread_mutex_lock(&cache->mutex);
124 
125    entry = tu_pipeline_cache_search_unlocked(cache, sha1);
126 
127    pthread_mutex_unlock(&cache->mutex);
128 
129    return entry;
130 }
131 
132 static void
tu_pipeline_cache_set_entry(struct tu_pipeline_cache * cache,struct cache_entry * entry)133 tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
134                             struct cache_entry *entry)
135 {
136    const uint32_t mask = cache->table_size - 1;
137    const uint32_t start = entry->sha1_dw[0];
138 
139    /* We'll always be able to insert when we get here. */
140    assert(cache->kernel_count < cache->table_size / 2);
141 
142    for (uint32_t i = 0; i < cache->table_size; i++) {
143       const uint32_t index = (start + i) & mask;
144       if (!cache->hash_table[index]) {
145          cache->hash_table[index] = entry;
146          break;
147       }
148    }
149 
150    cache->total_size += entry_size(entry);
151    cache->kernel_count++;
152 }
153 
154 static VkResult
tu_pipeline_cache_grow(struct tu_pipeline_cache * cache)155 tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
156 {
157    const uint32_t table_size = cache->table_size * 2;
158    const uint32_t old_table_size = cache->table_size;
159    const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
160    struct cache_entry **table;
161    struct cache_entry **old_table = cache->hash_table;
162 
163    table = malloc(byte_size);
164    if (table == NULL)
165       return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
166 
167    cache->hash_table = table;
168    cache->table_size = table_size;
169    cache->kernel_count = 0;
170    cache->total_size = 0;
171 
172    memset(cache->hash_table, 0, byte_size);
173    for (uint32_t i = 0; i < old_table_size; i++) {
174       struct cache_entry *entry = old_table[i];
175       if (!entry)
176          continue;
177 
178       tu_pipeline_cache_set_entry(cache, entry);
179    }
180 
181    free(old_table);
182 
183    return VK_SUCCESS;
184 }
185 
186 static void
tu_pipeline_cache_add_entry(struct tu_pipeline_cache * cache,struct cache_entry * entry)187 tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
188                             struct cache_entry *entry)
189 {
190    if (cache->kernel_count == cache->table_size / 2)
191       tu_pipeline_cache_grow(cache);
192 
193    /* Failing to grow that hash table isn't fatal, but may mean we don't
194     * have enough space to add this new kernel. Only add it if there's room.
195     */
196    if (cache->kernel_count < cache->table_size / 2)
197       tu_pipeline_cache_set_entry(cache, entry);
198 }
199 
200 static void
tu_pipeline_cache_load(struct tu_pipeline_cache * cache,const void * data,size_t size)201 tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
202                        const void *data,
203                        size_t size)
204 {
205    struct tu_device *device = cache->device;
206    struct vk_pipeline_cache_header header;
207 
208    if (size < sizeof(header))
209       return;
210    memcpy(&header, data, sizeof(header));
211    if (header.header_size < sizeof(header))
212       return;
213    if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
214       return;
215    if (header.vendor_id != 0 /* TODO */)
216       return;
217    if (header.device_id != 0 /* TODO */)
218       return;
219    if (memcmp(header.uuid, device->physical_device->cache_uuid,
220               VK_UUID_SIZE) != 0)
221       return;
222 
223    char *end = (void *) data + size;
224    char *p = (void *) data + header.header_size;
225 
226    while (end - p >= sizeof(struct cache_entry)) {
227       struct cache_entry *entry = (struct cache_entry *) p;
228       struct cache_entry *dest_entry;
229       size_t size = entry_size(entry);
230       if (end - p < size)
231          break;
232 
233       dest_entry =
234          vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
235       if (dest_entry) {
236          memcpy(dest_entry, entry, size);
237          for (int i = 0; i < MESA_SHADER_STAGES; ++i)
238             dest_entry->variants[i] = NULL;
239          tu_pipeline_cache_add_entry(cache, dest_entry);
240       }
241       p += size;
242    }
243 }
244 
245 VkResult
tu_CreatePipelineCache(VkDevice _device,const VkPipelineCacheCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineCache * pPipelineCache)246 tu_CreatePipelineCache(VkDevice _device,
247                        const VkPipelineCacheCreateInfo *pCreateInfo,
248                        const VkAllocationCallbacks *pAllocator,
249                        VkPipelineCache *pPipelineCache)
250 {
251    TU_FROM_HANDLE(tu_device, device, _device);
252    struct tu_pipeline_cache *cache;
253 
254    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
255    assert(pCreateInfo->flags == 0);
256 
257    cache = vk_object_alloc(&device->vk, pAllocator, sizeof(*cache),
258                            VK_OBJECT_TYPE_PIPELINE_CACHE);
259    if (cache == NULL)
260       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
261 
262    if (pAllocator)
263       cache->alloc = *pAllocator;
264    else
265       cache->alloc = device->vk.alloc;
266 
267    tu_pipeline_cache_init(cache, device);
268 
269    if (pCreateInfo->initialDataSize > 0) {
270       tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,
271                              pCreateInfo->initialDataSize);
272    }
273 
274    *pPipelineCache = tu_pipeline_cache_to_handle(cache);
275 
276    return VK_SUCCESS;
277 }
278 
279 void
tu_DestroyPipelineCache(VkDevice _device,VkPipelineCache _cache,const VkAllocationCallbacks * pAllocator)280 tu_DestroyPipelineCache(VkDevice _device,
281                         VkPipelineCache _cache,
282                         const VkAllocationCallbacks *pAllocator)
283 {
284    TU_FROM_HANDLE(tu_device, device, _device);
285    TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
286 
287    if (!cache)
288       return;
289    tu_pipeline_cache_finish(cache);
290 
291    vk_object_free(&device->vk, pAllocator, cache);
292 }
293 
294 VkResult
tu_GetPipelineCacheData(VkDevice _device,VkPipelineCache _cache,size_t * pDataSize,void * pData)295 tu_GetPipelineCacheData(VkDevice _device,
296                         VkPipelineCache _cache,
297                         size_t *pDataSize,
298                         void *pData)
299 {
300    TU_FROM_HANDLE(tu_device, device, _device);
301    TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
302    struct vk_pipeline_cache_header *header;
303    VkResult result = VK_SUCCESS;
304 
305    pthread_mutex_lock(&cache->mutex);
306 
307    const size_t size = sizeof(*header) + cache->total_size;
308    if (pData == NULL) {
309       pthread_mutex_unlock(&cache->mutex);
310       *pDataSize = size;
311       return VK_SUCCESS;
312    }
313    if (*pDataSize < sizeof(*header)) {
314       pthread_mutex_unlock(&cache->mutex);
315       *pDataSize = 0;
316       return VK_INCOMPLETE;
317    }
318    void *p = pData, *end = pData + *pDataSize;
319    header = p;
320    header->header_size = sizeof(*header);
321    header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
322    header->vendor_id = 0 /* TODO */;
323    header->device_id = 0 /* TODO */;
324    memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
325    p += header->header_size;
326 
327    struct cache_entry *entry;
328    for (uint32_t i = 0; i < cache->table_size; i++) {
329       if (!cache->hash_table[i])
330          continue;
331       entry = cache->hash_table[i];
332       const uint32_t size = entry_size(entry);
333       if (end < p + size) {
334          result = VK_INCOMPLETE;
335          break;
336       }
337 
338       memcpy(p, entry, size);
339       for (int j = 0; j < MESA_SHADER_STAGES; ++j)
340          ((struct cache_entry *) p)->variants[j] = NULL;
341       p += size;
342    }
343    *pDataSize = p - pData;
344 
345    pthread_mutex_unlock(&cache->mutex);
346    return result;
347 }
348 
349 static void
tu_pipeline_cache_merge(struct tu_pipeline_cache * dst,struct tu_pipeline_cache * src)350 tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
351                         struct tu_pipeline_cache *src)
352 {
353    for (uint32_t i = 0; i < src->table_size; i++) {
354       struct cache_entry *entry = src->hash_table[i];
355       if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
356          continue;
357 
358       tu_pipeline_cache_add_entry(dst, entry);
359 
360       src->hash_table[i] = NULL;
361    }
362 }
363 
364 VkResult
tu_MergePipelineCaches(VkDevice _device,VkPipelineCache destCache,uint32_t srcCacheCount,const VkPipelineCache * pSrcCaches)365 tu_MergePipelineCaches(VkDevice _device,
366                        VkPipelineCache destCache,
367                        uint32_t srcCacheCount,
368                        const VkPipelineCache *pSrcCaches)
369 {
370    TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
371 
372    for (uint32_t i = 0; i < srcCacheCount; i++) {
373       TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
374 
375       tu_pipeline_cache_merge(dst, src);
376    }
377 
378    return VK_SUCCESS;
379 }
380