1 /*
2  * Copyright (C) 2020 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include "bit.h"
28 #include "panfrost/lib/decode.h"
29 #include "drm-uapi/panfrost_drm.h"
30 #include "panfrost/lib/pan_encoder.h"
31 
32 /* Standalone compiler tests submitting jobs directly to the hardware. Uses the
33  * `bit` prefix for `BIfrost Tests` and because bit sounds wicked cool. */
34 
35 static struct panfrost_bo *
bit_bo_create(struct panfrost_device * dev,size_t size)36 bit_bo_create(struct panfrost_device *dev, size_t size)
37 {
38         struct panfrost_bo *bo = panfrost_bo_create(dev, size, PAN_BO_EXECUTE);
39         pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
40         return bo;
41 }
42 
43 struct panfrost_device *
bit_initialize(void * memctx)44 bit_initialize(void *memctx)
45 {
46         int fd = drmOpenWithType("panfrost", NULL, DRM_NODE_RENDER);
47 
48         if (fd < 0)
49                 unreachable("No panfrost device found. Try chmod?");
50 
51         struct panfrost_device *dev = rzalloc(memctx, struct panfrost_device);
52         panfrost_open_device(memctx, fd, dev);
53 
54         pandecode_initialize(true);
55         printf("%X\n", dev->gpu_id);
56 
57         return dev;
58 }
59 
60 static bool
bit_submit(struct panfrost_device * dev,enum mali_job_type T,void * payload,size_t payload_size,struct panfrost_bo ** bos,size_t bo_count,enum bit_debug debug)61 bit_submit(struct panfrost_device *dev,
62                 enum mali_job_type T,
63                 void *payload, size_t payload_size,
64                 struct panfrost_bo **bos, size_t bo_count, enum bit_debug debug)
65 {
66         struct panfrost_bo *job = bit_bo_create(dev, 4096);
67         pan_pack(job->ptr.cpu, JOB_HEADER, cfg) {
68                 cfg.type = T;
69                 cfg.index = 1;
70         }
71         memcpy(job->ptr.cpu + MALI_JOB_HEADER_LENGTH, payload, payload_size);
72 
73         uint32_t *bo_handles = calloc(sizeof(uint32_t), bo_count);
74 
75         for (unsigned i = 0; i < bo_count; ++i)
76                 bo_handles[i] = bos[i]->gem_handle;
77 
78         uint32_t syncobj = 0;
79         int ret = 0;
80 
81         ret = drmSyncobjCreate(dev->fd, DRM_SYNCOBJ_CREATE_SIGNALED, &syncobj);
82         assert(!ret);
83 
84         struct drm_panfrost_submit submit = {
85                 .jc = job->ptr.gpu,
86                 .bo_handles = (uintptr_t) bo_handles,
87                 .bo_handle_count = bo_count,
88                 .out_sync = syncobj,
89         };
90 
91         ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
92         assert(!ret);
93         free(bo_handles);
94 
95         drmSyncobjWait(dev->fd, &syncobj, 1, INT64_MAX, 0, NULL);
96         if (debug >= BIT_DEBUG_ALL)
97                 pandecode_jc(submit.jc, true, dev->gpu_id, false);
98         return true;
99 }
100 
101 /* Checks that the device is alive and responding to basic jobs as a sanity
102  * check - prerequisite to running code on the device. We test this via a
103  * WRITE_VALUE job */
104 
105 bool
bit_sanity_check(struct panfrost_device * dev)106 bit_sanity_check(struct panfrost_device *dev)
107 {
108         struct panfrost_bo *scratch = bit_bo_create(dev, 65536);
109         ((uint32_t *) scratch->ptr.cpu)[0] = 0xAA;
110 
111         struct mali_write_value_job_payload_packed payload;
112 
113         pan_pack(&payload, WRITE_VALUE_JOB_PAYLOAD, cfg) {
114                 cfg.address = scratch->ptr.gpu;
115                 cfg.type = MALI_WRITE_VALUE_TYPE_ZERO;
116         };
117 
118         struct panfrost_bo *bos[] = { scratch };
119         bool success = bit_submit(dev, MALI_JOB_TYPE_WRITE_VALUE,
120                         &payload, sizeof(payload), bos, 1, false);
121 
122         return success && (((uint8_t *) scratch->ptr.cpu)[0] == 0x0);
123 }
124 
125 /* Constructs a vertex job */
126 
127 bool
bit_vertex(struct panfrost_device * dev,panfrost_program * prog,uint32_t * iubo,size_t sz_ubo,uint32_t * iattr,size_t sz_attr,uint32_t * expected,size_t sz_expected,enum bit_debug debug)128 bit_vertex(struct panfrost_device *dev, panfrost_program *prog,
129                 uint32_t *iubo, size_t sz_ubo,
130                 uint32_t *iattr, size_t sz_attr,
131                 uint32_t *expected, size_t sz_expected, enum bit_debug debug)
132 {
133         struct panfrost_bo *shader = bit_bo_create(dev, prog->compiled.size);
134         struct panfrost_bo *shader_desc = bit_bo_create(dev, 4096);
135         struct panfrost_bo *ubo = bit_bo_create(dev, 4096);
136         struct panfrost_bo *var = bit_bo_create(dev, 4096);
137         struct panfrost_bo *attr = bit_bo_create(dev, 4096);
138 
139         pan_pack(attr->ptr.cpu, ATTRIBUTE, cfg) {
140                 cfg.format = (MALI_RGBA32UI << 12);
141                 cfg.offset_enable = true;
142         }
143 
144         pan_pack(var->ptr.cpu, ATTRIBUTE, cfg) {
145                 cfg.format = (MALI_RGBA32UI << 12);
146                 cfg.offset_enable = false;
147         }
148 
149         pan_pack(var->ptr.cpu + 256, ATTRIBUTE_BUFFER, cfg) {
150                 cfg.pointer = (var->ptr.gpu + 1024);
151                 cfg.size = 1024;
152         }
153 
154         pan_pack(attr->ptr.cpu + 256, ATTRIBUTE_BUFFER, cfg) {
155                 cfg.pointer = (attr->ptr.gpu + 1024);
156                 cfg.size = 1024;
157         }
158 
159         pan_pack(ubo->ptr.cpu, UNIFORM_BUFFER, cfg) {
160                 cfg.entries = sz_ubo / 16;
161                 cfg.pointer = ubo->ptr.gpu + 1024;
162         }
163 
164         if (sz_ubo)
165                 memcpy(ubo->ptr.cpu + 1024, iubo, sz_ubo);
166 
167         if (sz_attr)
168                 memcpy(attr->ptr.cpu + 1024, iattr, sz_attr);
169 
170         struct panfrost_bo *shmem = bit_bo_create(dev, 4096);
171 
172         pan_pack(shmem->ptr.cpu, LOCAL_STORAGE, cfg) {
173                 cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
174         }
175 
176         pan_pack(shader_desc->ptr.cpu, RENDERER_STATE, cfg) {
177                 cfg.shader.shader = shader->ptr.gpu;
178                 cfg.shader.attribute_count = cfg.shader.varying_count = 1;
179                 cfg.properties.uniform_buffer_count = 1;
180                 cfg.properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
181                 cfg.preload.vertex.vertex_id = true;
182                 cfg.preload.vertex.instance_id = true;
183                 cfg.preload.uniform_count = (sz_ubo / 16);
184         }
185 
186         memcpy(shader->ptr.cpu, prog->compiled.data, prog->compiled.size);
187 
188         struct mali_compute_job_packed job;
189 
190         pan_section_pack(&job, COMPUTE_JOB, PARAMETERS, cfg) {
191                 cfg.job_task_split = 5;
192         }
193 
194         pan_section_pack(&job, COMPUTE_JOB, DRAW, cfg) {
195                 cfg.draw_descriptor_is_64b = true;
196                 cfg.thread_storage = shmem->ptr.gpu;
197                 cfg.state = shader_desc->ptr.gpu;
198                 cfg.push_uniforms = ubo->ptr.gpu + 1024;
199                 cfg.uniform_buffers = ubo->ptr.gpu;
200                 cfg.attributes = attr->ptr.gpu;
201                 cfg.attribute_buffers = attr->ptr.gpu + 256;
202                 cfg.varyings = var->ptr.gpu;
203                 cfg.varying_buffers = var->ptr.gpu + 256;
204         }
205 
206         void *invocation = pan_section_ptr(&job, COMPUTE_JOB, INVOCATION);
207         panfrost_pack_work_groups_compute(invocation,
208                                           1, 1, 1,
209                                           1, 1, 1,
210                                           true);
211 
212         struct panfrost_bo *bos[] = {
213                 shmem, shader, shader_desc, ubo, var, attr
214         };
215 
216         bool succ = bit_submit(dev, MALI_JOB_TYPE_VERTEX,
217                                ((void *)&job) + MALI_JOB_HEADER_LENGTH,
218                                MALI_COMPUTE_JOB_LENGTH - MALI_JOB_HEADER_LENGTH,
219                                bos, ARRAY_SIZE(bos), debug);
220 
221         /* Check the output varyings */
222 
223         uint32_t *output = (uint32_t *) (var->ptr.cpu + 1024);
224         float *foutput = (float *) output;
225         float *fexpected = (float *) expected;
226 
227         if (sz_expected) {
228                 unsigned comp = memcmp(output, expected, sz_expected);
229                 succ &= (comp == 0);
230 
231                 if (comp && (debug >= BIT_DEBUG_FAIL)) {
232                         fprintf(stderr, "expected [");
233 
234                         for (unsigned i = 0; i < (sz_expected >> 2); ++i)
235                                 fprintf(stderr, "%08X /* %f */ ", expected[i], fexpected[i]);
236 
237                         fprintf(stderr, "], got [");
238 
239                         for (unsigned i = 0; i < (sz_expected >> 2); ++i)
240                                 fprintf(stderr, "%08X /* %f */ ", output[i], foutput[i]);
241 
242                         fprintf(stderr, "\n");
243                 }
244         } else if (debug == BIT_DEBUG_ALL) {
245                 fprintf(stderr, "got [");
246 
247                 for (unsigned i = 0; i < 4; ++i)
248                         fprintf(stderr, "%08X /* %f */ ", output[i], foutput[i]);
249 
250                 fprintf(stderr, "\n");
251         }
252 
253         return succ;
254 }
255