1 /*
2  * Copyright (C) 2019 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  *
26  */
27 
28 #include <assert.h>
29 #include "util/u_math.h"
30 #include "pan_encoder.h"
31 
32 /* Compute shaders are invoked with a gl_NumWorkGroups X/Y/Z triplet. Vertex
33  * shaders, it turns out, are invoked with the same mechanism, with the triplet
34  * (1, vertex_count, instance_count).
35  *
36  * Alongside this triplet is the gl_WorkGroupSize X/Y/Z triplet.
37  *
38  * Unfortunately, the packing for these triplet into the
39  * mali_vertex_tiler_prefix is a little funky, using a dynamic bitfield. The
40  * routines here exist to pack this */
41 
42 void
panfrost_pack_work_groups_compute(struct mali_invocation_packed * out,unsigned num_x,unsigned num_y,unsigned num_z,unsigned size_x,unsigned size_y,unsigned size_z,bool quirk_graphics)43 panfrost_pack_work_groups_compute(
44         struct mali_invocation_packed *out,
45         unsigned num_x,
46         unsigned num_y,
47         unsigned num_z,
48         unsigned size_x,
49         unsigned size_y,
50         unsigned size_z,
51         bool quirk_graphics)
52 {
53         uint32_t packed = 0;
54 
55         /* The values needing packing, in order, and the corresponding shifts.
56          * Indicies into shift are off-by-one to make the logic easier */
57 
58         unsigned shifts[7] = { 0 };
59 
60         unsigned values[6] = {
61                 MALI_POSITIVE(size_x),
62                 MALI_POSITIVE(size_y),
63                 MALI_POSITIVE(size_z),
64                 MALI_POSITIVE(num_x),
65                 MALI_POSITIVE(num_y),
66                 MALI_POSITIVE(num_z),
67         };
68 
69         for (unsigned i = 0; i < 6; ++i) {
70                 /* OR it in, shifting as required */
71                 packed |= (values[i] << shifts[i]);
72 
73                 /* How many bits did we use? */
74                 unsigned bit_count = util_logbase2_ceil(values[i] + 1);
75 
76                 /* Set the next shift accordingly */
77                 shifts[i + 1] = shifts[i] + bit_count;
78         }
79 
80         pan_pack(out, INVOCATION, cfg) {
81                 cfg.invocations = packed;
82                 cfg.size_y_shift = shifts[1];
83                 cfg.size_z_shift = shifts[2];
84                 cfg.workgroups_x_shift = shifts[3];
85                 cfg.workgroups_y_shift = shifts[4];
86                 cfg.workgroups_z_shift = shifts[5];
87 
88                 /* Quirk: for non-instanced graphics, the blob sets
89                  * workgroups_z_shift = 32. This doesn't appear to matter to
90                  * the hardware, but it's good to be bit-identical. */
91 
92                 if (quirk_graphics && (num_z <= 1))
93                         cfg.workgroups_z_shift = 32;
94 
95                 /* Quirk: for graphics, >= 2.  For compute, 2 without barriers
96                  * but equal to workgroups_x_shift with barriers */
97 
98                 cfg.unknown_shift = quirk_graphics ? 2 : cfg.workgroups_x_shift;
99         }
100 }
101