1 /*
2  * Copyright (C) 2011 Francisco Jerez.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sublicense, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial
15  * portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  */
26 
27 #include <fcntl.h>
28 #include <stdio.h>
29 #include <sys/stat.h>
30 #include <inttypes.h>
31 #include "pipe/p_state.h"
32 #include "pipe/p_context.h"
33 #include "pipe/p_screen.h"
34 #include "pipe/p_defines.h"
35 #include "pipe/p_shader_tokens.h"
36 #include "util/u_memory.h"
37 #include "util/u_inlines.h"
38 #include "util/u_sampler.h"
39 #include "util/u_format.h"
40 #include "tgsi/tgsi_text.h"
41 #include "pipe-loader/pipe_loader.h"
42 
43 #define MAX_RESOURCES 4
44 
45 struct context {
46         struct pipe_loader_device *dev;
47         struct pipe_screen *screen;
48         struct pipe_context *pipe;
49         void *hwcs;
50         void *hwsmp[MAX_RESOURCES];
51         struct pipe_resource *tex[MAX_RESOURCES];
52         bool tex_rw[MAX_RESOURCES];
53         struct pipe_sampler_view *view[MAX_RESOURCES];
54         struct pipe_surface *surf[MAX_RESOURCES];
55 };
56 
57 #define DUMP_COMPUTE_PARAM(p, c) do {                                   \
58                 uint64_t __v[4];                                        \
59                 int __i, __n;                                           \
60                                                                         \
61                 __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \
62                 printf("%s: {", #c);                                    \
63                                                                         \
64                 for (__i = 0; __i < __n / sizeof(*__v); ++__i)          \
65                         printf(" %"PRIu64, __v[__i]);                   \
66                                                                         \
67                 printf(" }\n");                                         \
68         } while (0)
69 
init_ctx(struct context * ctx)70 static void init_ctx(struct context *ctx)
71 {
72         int ret;
73 
74         ret = pipe_loader_probe(&ctx->dev, 1);
75         assert(ret);
76 
77         ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR);
78         assert(ctx->screen);
79 
80         ctx->pipe = ctx->screen->context_create(ctx->screen, NULL);
81         assert(ctx->pipe);
82 
83         DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION);
84         DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE);
85         DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
86 }
87 
destroy_ctx(struct context * ctx)88 static void destroy_ctx(struct context *ctx)
89 {
90         ctx->pipe->destroy(ctx->pipe);
91         ctx->screen->destroy(ctx->screen);
92         pipe_loader_release(&ctx->dev, 1);
93         FREE(ctx);
94 }
95 
96 static char *
preprocess_prog(struct context * ctx,const char * src,const char * defs)97 preprocess_prog(struct context *ctx, const char *src, const char *defs)
98 {
99         const char header[] =
100                 "#define RGLOBAL        RES[32767]\n"
101                 "#define RLOCAL         RES[32766]\n"
102                 "#define RPRIVATE       RES[32765]\n"
103                 "#define RINPUT         RES[32764]\n";
104         char cmd[512];
105         char tmp[] = "/tmp/test-compute.tgsi-XXXXXX";
106         char *buf;
107         int fd, ret;
108         struct stat st;
109         FILE *p;
110 
111         /* Open a temporary file */
112         fd = mkstemp(tmp);
113         assert(fd >= 0);
114         snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s",
115                  defs ? defs : "", tmp);
116 
117         /* Preprocess */
118         p = popen(cmd, "w");
119         fwrite(header, strlen(header), 1, p);
120         fwrite(src, strlen(src), 1, p);
121         ret = pclose(p);
122         assert(!ret);
123 
124         /* Read back */
125         ret = fstat(fd, &st);
126         assert(!ret);
127 
128         buf = malloc(st.st_size + 1);
129         ret = read(fd, buf, st.st_size);
130         assert(ret == st.st_size);
131         buf[ret] = 0;
132 
133         /* Clean up */
134         close(fd);
135         unlink(tmp);
136 
137         return buf;
138 }
139 
init_prog(struct context * ctx,unsigned local_sz,unsigned private_sz,unsigned input_sz,const char * src,const char * defs)140 static void init_prog(struct context *ctx, unsigned local_sz,
141                       unsigned private_sz, unsigned input_sz,
142                       const char *src, const char *defs)
143 {
144         struct pipe_context *pipe = ctx->pipe;
145         struct tgsi_token prog[1024];
146         struct pipe_compute_state cs = {
147                 .prog = prog,
148                 .req_local_mem = local_sz,
149                 .req_private_mem = private_sz,
150                 .req_input_mem = input_sz
151         };
152         char *psrc = preprocess_prog(ctx, src, defs);
153         int ret;
154 
155         ret = tgsi_text_translate(psrc, prog, Elements(prog));
156         assert(ret);
157         free(psrc);
158 
159         ctx->hwcs = pipe->create_compute_state(pipe, &cs);
160         assert(ctx->hwcs);
161 
162         pipe->bind_compute_state(pipe, ctx->hwcs);
163 }
164 
destroy_prog(struct context * ctx)165 static void destroy_prog(struct context *ctx)
166 {
167         struct pipe_context *pipe = ctx->pipe;
168 
169         pipe->delete_compute_state(pipe, ctx->hwcs);
170         ctx->hwcs = NULL;
171 }
172 
init_tex(struct context * ctx,int slot,enum pipe_texture_target target,bool rw,enum pipe_format format,int w,int h,void (* init)(void *,int,int,int))173 static void init_tex(struct context *ctx, int slot,
174                      enum pipe_texture_target target, bool rw,
175                      enum pipe_format format, int w, int h,
176                      void (*init)(void *, int, int, int))
177 {
178         struct pipe_context *pipe = ctx->pipe;
179         struct pipe_resource **tex = &ctx->tex[slot];
180         struct pipe_resource ttex = {
181                 .target = target,
182                 .format = format,
183                 .width0 = w,
184                 .height0 = h,
185                 .depth0 = 1,
186                 .array_size = 1,
187                 .bind = (PIPE_BIND_SAMPLER_VIEW |
188                          PIPE_BIND_COMPUTE_RESOURCE |
189                          PIPE_BIND_GLOBAL)
190         };
191         int dx = util_format_get_blocksize(format);
192         int dy = util_format_get_stride(format, w);
193         int nx = (target == PIPE_BUFFER ? (w / dx) :
194                   util_format_get_nblocksx(format, w));
195         int ny = (target == PIPE_BUFFER ? 1 :
196                   util_format_get_nblocksy(format, h));
197         struct pipe_transfer *xfer;
198         char *map;
199         int x, y;
200 
201         *tex = ctx->screen->resource_create(ctx->screen, &ttex);
202         assert(*tex);
203 
204         xfer = pipe->get_transfer(pipe, *tex, 0, PIPE_TRANSFER_WRITE,
205                                   &(struct pipe_box) { .width = w,
206                                                   .height = h,
207                                                   .depth = 1 });
208         assert(xfer);
209 
210         map = pipe->transfer_map(pipe, xfer);
211         assert(map);
212 
213         for (y = 0; y < ny; ++y) {
214                 for (x = 0; x < nx; ++x) {
215                         init(map + y * dy + x * dx, slot, x, y);
216                 }
217         }
218 
219         pipe->transfer_unmap(pipe, xfer);
220         pipe->transfer_destroy(pipe, xfer);
221 
222         ctx->tex_rw[slot] = rw;
223 }
224 
default_check(void * x,void * y,int sz)225 static bool default_check(void *x, void *y, int sz) {
226         return !memcmp(x, y, sz);
227 }
228 
check_tex(struct context * ctx,int slot,void (* expect)(void *,int,int,int),bool (* check)(void *,void *,int))229 static void check_tex(struct context *ctx, int slot,
230                       void (*expect)(void *, int, int, int),
231                       bool (*check)(void *, void *, int))
232 {
233         struct pipe_context *pipe = ctx->pipe;
234         struct pipe_resource *tex = ctx->tex[slot];
235         int dx = util_format_get_blocksize(tex->format);
236         int dy = util_format_get_stride(tex->format, tex->width0);
237         int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) :
238                   util_format_get_nblocksx(tex->format, tex->width0));
239         int ny = (tex->target == PIPE_BUFFER ? 1 :
240                   util_format_get_nblocksy(tex->format, tex->height0));
241         struct pipe_transfer *xfer;
242         char *map;
243         int x, y, i;
244         int err = 0;
245 
246         if (!check)
247                 check = default_check;
248 
249         xfer = pipe->get_transfer(pipe, tex, 0, PIPE_TRANSFER_READ,
250                                   &(struct pipe_box) { .width = tex->width0,
251                                         .height = tex->height0,
252                                         .depth = 1 });
253         assert(xfer);
254 
255         map = pipe->transfer_map(pipe, xfer);
256         assert(map);
257 
258         for (y = 0; y < ny; ++y) {
259                 for (x = 0; x < nx; ++x) {
260                         uint32_t exp[4];
261                         uint32_t *res = (uint32_t *)(map + y * dy + x * dx);
262 
263                         expect(exp, slot, x, y);
264                         if (check(res, exp, dx) || (++err) > 20)
265                                 continue;
266 
267                         if (dx < 4) {
268                                 uint32_t u = 0, v = 0;
269 
270                                 for (i = 0; i < dx; i++) {
271                                         u |= ((uint8_t *)exp)[i] << (8 * i);
272                                         v |= ((uint8_t *)res)[i] << (8 * i);
273                                 }
274                                 printf("(%d, %d): got 0x%x, expected 0x%x\n",
275                                        x, y, v, u);
276                         } else {
277                                 for (i = 0; i < dx / 4; i++) {
278                                         printf("(%d, %d)[%d]: got 0x%x/%f,"
279                                                " expected 0x%x/%f\n", x, y, i,
280                                                res[i], ((float *)res)[i],
281                                                exp[i], ((float *)exp)[i]);
282                                 }
283                         }
284                 }
285         }
286 
287         pipe->transfer_unmap(pipe, xfer);
288         pipe->transfer_destroy(pipe, xfer);
289 
290         if (err)
291                 printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err);
292         else
293                 printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y);
294 }
295 
destroy_tex(struct context * ctx)296 static void destroy_tex(struct context *ctx)
297 {
298         int i;
299 
300         for (i = 0; i < MAX_RESOURCES; ++i) {
301                 if (ctx->tex[i])
302                         pipe_resource_reference(&ctx->tex[i], NULL);
303         }
304 }
305 
init_sampler_views(struct context * ctx,const int * slots)306 static void init_sampler_views(struct context *ctx, const int *slots)
307 {
308         struct pipe_context *pipe = ctx->pipe;
309         struct pipe_sampler_view tview;
310         int i;
311 
312         for (i = 0; *slots >= 0; ++i, ++slots) {
313                 u_sampler_view_default_template(&tview, ctx->tex[*slots],
314                                                 ctx->tex[*slots]->format);
315 
316                 ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots],
317                                                          &tview);
318                 assert(ctx->view[i]);
319         }
320 
321         pipe->set_compute_sampler_views(pipe, 0, i, ctx->view);
322 }
323 
destroy_sampler_views(struct context * ctx)324 static void destroy_sampler_views(struct context *ctx)
325 {
326         struct pipe_context *pipe = ctx->pipe;
327         int i;
328 
329         pipe->set_compute_sampler_views(pipe, 0, MAX_RESOURCES, NULL);
330 
331         for (i = 0; i < MAX_RESOURCES; ++i) {
332                 if (ctx->view[i]) {
333                         pipe->sampler_view_destroy(pipe, ctx->view[i]);
334                         ctx->view[i] = NULL;
335                 }
336         }
337 }
338 
init_compute_resources(struct context * ctx,const int * slots)339 static void init_compute_resources(struct context *ctx, const int *slots)
340 {
341         struct pipe_context *pipe = ctx->pipe;
342         int i;
343 
344         for (i = 0; *slots >= 0; ++i, ++slots) {
345                 struct pipe_surface tsurf = {
346                         .format = ctx->tex[*slots]->format,
347                         .usage = ctx->tex[*slots]->bind,
348                         .writable = ctx->tex_rw[*slots]
349                 };
350 
351                 if (ctx->tex[*slots]->target == PIPE_BUFFER)
352                         tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1;
353 
354                 ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots],
355                                                     &tsurf);
356                 assert(ctx->surf[i]);
357         }
358 
359         pipe->set_compute_resources(pipe, 0, i, ctx->surf);
360 }
361 
destroy_compute_resources(struct context * ctx)362 static void destroy_compute_resources(struct context *ctx)
363 {
364         struct pipe_context *pipe = ctx->pipe;
365         int i;
366 
367         pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL);
368 
369         for (i = 0; i < MAX_RESOURCES; ++i) {
370                 if (ctx->surf[i]) {
371                         pipe->surface_destroy(pipe, ctx->surf[i]);
372                         ctx->surf[i] = NULL;
373                 }
374         }
375 }
376 
init_sampler_states(struct context * ctx,int n)377 static void init_sampler_states(struct context *ctx, int n)
378 {
379         struct pipe_context *pipe = ctx->pipe;
380         struct pipe_sampler_state smp = {
381                 .normalized_coords = 1,
382         };
383         int i;
384 
385         for (i = 0; i < n; ++i) {
386                 ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp);
387                 assert(ctx->hwsmp[i]);
388         }
389 
390         pipe->bind_compute_sampler_states(pipe, 0, i, ctx->hwsmp);
391 }
392 
destroy_sampler_states(struct context * ctx)393 static void destroy_sampler_states(struct context *ctx)
394 {
395         struct pipe_context *pipe = ctx->pipe;
396         int i;
397 
398         pipe->bind_compute_sampler_states(pipe, 0, MAX_RESOURCES, NULL);
399 
400         for (i = 0; i < MAX_RESOURCES; ++i) {
401                 if (ctx->hwsmp[i]) {
402                         pipe->delete_sampler_state(pipe, ctx->hwsmp[i]);
403                         ctx->hwsmp[i] = NULL;
404                 }
405         }
406 }
407 
init_globals(struct context * ctx,const int * slots,uint32_t ** handles)408 static void init_globals(struct context *ctx, const int *slots,
409                          uint32_t **handles)
410 {
411         struct pipe_context *pipe = ctx->pipe;
412         struct pipe_resource *res[MAX_RESOURCES];
413         int i;
414 
415         for (i = 0; *slots >= 0; ++i, ++slots)
416                 res[i] = ctx->tex[*slots];
417 
418         pipe->set_global_binding(pipe, 0, i, res, handles);
419 }
420 
destroy_globals(struct context * ctx)421 static void destroy_globals(struct context *ctx)
422 {
423         struct pipe_context *pipe = ctx->pipe;
424 
425         pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL);
426 }
427 
launch_grid(struct context * ctx,const uint * block_layout,const uint * grid_layout,uint32_t pc,const void * input)428 static void launch_grid(struct context *ctx, const uint *block_layout,
429                         const uint *grid_layout, uint32_t pc,
430                         const void *input)
431 {
432         struct pipe_context *pipe = ctx->pipe;
433 
434         pipe->launch_grid(pipe, block_layout, grid_layout, pc, input);
435 }
436 
test_system_values(struct context * ctx)437 static void test_system_values(struct context *ctx)
438 {
439         const char *src = "COMP\n"
440                 "DCL RES[0], BUFFER, RAW, WR\n"
441                 "DCL SV[0], BLOCK_ID[0]\n"
442                 "DCL SV[1], BLOCK_SIZE[0]\n"
443                 "DCL SV[2], GRID_SIZE[0]\n"
444                 "DCL SV[3], THREAD_ID[0]\n"
445                 "DCL TEMP[0], LOCAL\n"
446                 "DCL TEMP[1], LOCAL\n"
447                 "IMM UINT32 { 64, 0, 0, 0 }\n"
448                 "IMM UINT32 { 16, 0, 0, 0 }\n"
449                 "IMM UINT32 { 0, 0, 0, 0 }\n"
450                 "\n"
451                 "BGNSUB"
452                 "  UMUL TEMP[0], SV[0], SV[1]\n"
453                 "  UADD TEMP[0], TEMP[0], SV[3]\n"
454                 "  UMUL TEMP[1], SV[1], SV[2]\n"
455                 "  UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n"
456                 "  UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n"
457                 "  UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n"
458                 "  UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n"
459                 "  UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n"
460                 "  UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
461                 "  STORE RES[0].xyzw, TEMP[0], SV[0]\n"
462                 "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
463                 "  STORE RES[0].xyzw, TEMP[0], SV[1]\n"
464                 "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
465                 "  STORE RES[0].xyzw, TEMP[0], SV[2]\n"
466                 "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
467                 "  STORE RES[0].xyzw, TEMP[0], SV[3]\n"
468                 "  RET\n"
469                 "ENDSUB\n";
470         void init(void *p, int s, int x, int y) {
471                 *(uint32_t *)p = 0xdeadbeef;
472         }
473         void expect(void *p, int s, int x, int y) {
474                 int id = x / 16, sv = (x % 16) / 4, c = x % 4;
475                 int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 };
476                 int bsz[] = { 4, 3, 5, 1};
477                 int gsz[] = { 5, 4, 1, 1};
478 
479                 switch (sv) {
480                 case 0:
481                         *(uint32_t *)p = tid[c] / bsz[c];
482                         break;
483                 case 1:
484                         *(uint32_t *)p = bsz[c];
485                         break;
486                 case 2:
487                         *(uint32_t *)p = gsz[c];
488                         break;
489                 case 3:
490                         *(uint32_t *)p = tid[c] % bsz[c];
491                         break;
492                 }
493         }
494 
495         printf("- %s\n", __func__);
496 
497         init_prog(ctx, 0, 0, 0, src, NULL);
498         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
499                  76800, 0, init);
500         init_compute_resources(ctx, (int []) { 0, -1 });
501         launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL);
502         check_tex(ctx, 0, expect, NULL);
503         destroy_compute_resources(ctx);
504         destroy_tex(ctx);
505         destroy_prog(ctx);
506 }
507 
test_resource_access(struct context * ctx)508 static void test_resource_access(struct context *ctx)
509 {
510         const char *src = "COMP\n"
511                 "DCL RES[0], BUFFER, RAW, WR\n"
512                 "DCL RES[1], 2D, RAW, WR\n"
513                 "DCL SV[0], BLOCK_ID[0]\n"
514                 "DCL TEMP[0], LOCAL\n"
515                 "DCL TEMP[1], LOCAL\n"
516                 "IMM UINT32 { 15, 0, 0, 0 }\n"
517                 "IMM UINT32 { 16, 1, 0, 0 }\n"
518                 "\n"
519                 "    BGNSUB\n"
520                 "       UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n"
521                 "       AND TEMP[0].x, TEMP[0], IMM[0]\n"
522                 "       UMUL TEMP[0].x, TEMP[0], IMM[1]\n"
523                 "       LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n"
524                 "       UMUL TEMP[1], SV[0], IMM[1]\n"
525                 "       STORE RES[1].xyzw, TEMP[1], TEMP[0]\n"
526                 "       RET\n"
527                 "    ENDSUB\n";
528         void init0(void *p, int s, int x, int y) {
529                 *(float *)p = 8.0 - (float)x;
530         }
531         void init1(void *p, int s, int x, int y) {
532                 *(uint32_t *)p = 0xdeadbeef;
533         }
534         void expect(void *p, int s, int x, int y) {
535                 *(float *)p = 8.0 - (float)((x + 4*y) & 0x3f);
536         }
537 
538         printf("- %s\n", __func__);
539 
540         init_prog(ctx, 0, 0, 0, src, NULL);
541         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
542                  256, 0, init0);
543         init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
544                  60, 12, init1);
545         init_compute_resources(ctx, (int []) { 0, 1, -1 });
546         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL);
547         check_tex(ctx, 1, expect, NULL);
548         destroy_compute_resources(ctx);
549         destroy_tex(ctx);
550         destroy_prog(ctx);
551 }
552 
test_function_calls(struct context * ctx)553 static void test_function_calls(struct context *ctx)
554 {
555         const char *src = "COMP\n"
556                 "DCL RES[0], 2D, RAW, WR\n"
557                 "DCL SV[0], BLOCK_ID[0]\n"
558                 "DCL SV[1], BLOCK_SIZE[0]\n"
559                 "DCL SV[2], GRID_SIZE[0]\n"
560                 "DCL SV[3], THREAD_ID[0]\n"
561                 "DCL TEMP[0]\n"
562                 "DCL TEMP[1]\n"
563                 "DCL TEMP[2], LOCAL\n"
564                 "IMM UINT32 { 0, 11, 22, 33 }\n"
565                 "IMM FLT32 { 11, 33, 55, 99 }\n"
566                 "IMM UINT32 { 4, 1, 0, 0 }\n"
567                 "IMM UINT32 { 12, 0, 0, 0 }\n"
568                 "\n"
569                 "00: BGNSUB\n"
570                 "01:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
571                 "02:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
572                 "03:  USLT TEMP[0].x, TEMP[0], IMM[0]\n"
573                 "04:  RET\n"
574                 "05: ENDSUB\n"
575                 "06: BGNSUB\n"
576                 "07:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
577                 "08:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
578                 "09:  USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n"
579                 "10:  IF TEMP[0].xxxx\n"
580                 "11:   CAL :0\n"
581                 "12:  ENDIF\n"
582                 "13:  RET\n"
583                 "14: ENDSUB\n"
584                 "15: BGNSUB\n"
585                 "16:  UMUL TEMP[2], SV[0], SV[1]\n"
586                 "17:  UADD TEMP[2], TEMP[2], SV[3]\n"
587                 "18:  UMUL TEMP[2], TEMP[2], IMM[2]\n"
588                 "00:  MOV TEMP[1].x, IMM[2].wwww\n"
589                 "19:  LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n"
590                 "20:  CAL :6\n"
591                 "21:  STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n"
592                 "22:  RET\n"
593                 "23: ENDSUB\n";
594         void init(void *p, int s, int x, int y) {
595                 *(uint32_t *)p = 15 * y + x;
596         }
597         void expect(void *p, int s, int x, int y) {
598                 *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ;
599         }
600 
601         printf("- %s\n", __func__);
602 
603         init_prog(ctx, 0, 0, 0, src, NULL);
604         init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
605                  15, 12, init);
606         init_compute_resources(ctx, (int []) { 0, -1 });
607         launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL);
608         check_tex(ctx, 0, expect, NULL);
609         destroy_compute_resources(ctx);
610         destroy_tex(ctx);
611         destroy_prog(ctx);
612 }
613 
test_input_global(struct context * ctx)614 static void test_input_global(struct context *ctx)
615 {
616         const char *src = "COMP\n"
617                 "DCL SV[0], THREAD_ID[0]\n"
618                 "DCL TEMP[0], LOCAL\n"
619                 "DCL TEMP[1], LOCAL\n"
620                 "IMM UINT32 { 8, 0, 0, 0 }\n"
621                 "\n"
622                 "    BGNSUB\n"
623                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
624                 "       LOAD TEMP[1].xy, RINPUT, TEMP[0]\n"
625                 "       LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n"
626                 "       UADD TEMP[1].x, TEMP[0], -TEMP[1]\n"
627                 "       STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n"
628                 "       RET\n"
629                 "    ENDSUB\n";
630         void init(void *p, int s, int x, int y) {
631                 *(uint32_t *)p = 0xdeadbeef;
632         }
633         void expect(void *p, int s, int x, int y) {
634                 *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0);
635         }
636         uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004,
637                               0x10005, 0x10006, 0x10007, 0x10008 };
638 
639         printf("- %s\n", __func__);
640 
641         init_prog(ctx, 0, 0, 32, src, NULL);
642         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
643         init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
644         init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
645         init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
646         init_globals(ctx, (int []){ 0, 1, 2, 3, -1 },
647                      (uint32_t *[]){ &input[1], &input[3],
648                                      &input[5], &input[7] });
649         launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input);
650         check_tex(ctx, 0, expect, NULL);
651         check_tex(ctx, 1, expect, NULL);
652         check_tex(ctx, 2, expect, NULL);
653         check_tex(ctx, 3, expect, NULL);
654         destroy_globals(ctx);
655         destroy_tex(ctx);
656         destroy_prog(ctx);
657 }
658 
test_private(struct context * ctx)659 static void test_private(struct context *ctx)
660 {
661         const char *src = "COMP\n"
662                 "DCL RES[0], BUFFER, RAW, WR\n"
663                 "DCL SV[0], BLOCK_ID[0]\n"
664                 "DCL SV[1], BLOCK_SIZE[0]\n"
665                 "DCL SV[2], THREAD_ID[0]\n"
666                 "DCL TEMP[0], LOCAL\n"
667                 "DCL TEMP[1], LOCAL\n"
668                 "DCL TEMP[2], LOCAL\n"
669                 "IMM UINT32 { 128, 0, 0, 0 }\n"
670                 "IMM UINT32 { 4, 0, 0, 0 }\n"
671                 "\n"
672                 "    BGNSUB\n"
673                 "       UMUL TEMP[0].x, SV[0], SV[1]\n"
674                 "       UADD TEMP[0].x, TEMP[0], SV[2]\n"
675                 "       MOV TEMP[1].x, IMM[0].wwww\n"
676                 "       BGNLOOP\n"
677                 "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
678                 "               IF TEMP[2]\n"
679                 "                       BRK\n"
680                 "               ENDIF\n"
681                 "               UDIV TEMP[2].x, TEMP[1], IMM[1]\n"
682                 "               UADD TEMP[2].x, TEMP[2], TEMP[0]\n"
683                 "               STORE RPRIVATE.x, TEMP[1], TEMP[2]\n"
684                 "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
685                 "       ENDLOOP\n"
686                 "       MOV TEMP[1].x, IMM[0].wwww\n"
687                 "       UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
688                 "       BGNLOOP\n"
689                 "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
690                 "               IF TEMP[2]\n"
691                 "                       BRK\n"
692                 "               ENDIF\n"
693                 "               LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n"
694                 "               STORE RES[0].x, TEMP[0], TEMP[2]\n"
695                 "               UADD TEMP[0].x, TEMP[0], IMM[1]\n"
696                 "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
697                 "       ENDLOOP\n"
698                 "       RET\n"
699                 "    ENDSUB\n";
700         void init(void *p, int s, int x, int y) {
701                 *(uint32_t *)p = 0xdeadbeef;
702         }
703         void expect(void *p, int s, int x, int y) {
704                 *(uint32_t *)p = (x / 32) + x % 32;
705         }
706 
707         printf("- %s\n", __func__);
708 
709         init_prog(ctx, 0, 128, 0, src, NULL);
710         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
711                  32768, 0, init);
712         init_compute_resources(ctx, (int []) { 0, -1 });
713         launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
714         check_tex(ctx, 0, expect, NULL);
715         destroy_compute_resources(ctx);
716         destroy_tex(ctx);
717         destroy_prog(ctx);
718 }
719 
test_local(struct context * ctx)720 static void test_local(struct context *ctx)
721 {
722         const char *src = "COMP\n"
723                 "DCL RES[0], BUFFER, RAW, WR\n"
724                 "DCL SV[0], BLOCK_ID[0]\n"
725                 "DCL SV[1], BLOCK_SIZE[0]\n"
726                 "DCL SV[2], THREAD_ID[0]\n"
727                 "DCL TEMP[0], LOCAL\n"
728                 "DCL TEMP[1], LOCAL\n"
729                 "DCL TEMP[2], LOCAL\n"
730                 "IMM UINT32 { 1, 0, 0, 0 }\n"
731                 "IMM UINT32 { 2, 0, 0, 0 }\n"
732                 "IMM UINT32 { 4, 0, 0, 0 }\n"
733                 "IMM UINT32 { 32, 0, 0, 0 }\n"
734                 "IMM UINT32 { 128, 0, 0, 0 }\n"
735                 "\n"
736                 "    BGNSUB\n"
737                 "       UMUL TEMP[0].x, SV[2], IMM[2]\n"
738                 "       STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n"
739                 "       MFENCE RLOCAL\n"
740                 "       USLT TEMP[1].x, SV[2], IMM[3]\n"
741                 "       IF TEMP[1]\n"
742                 "               UADD TEMP[1].x, TEMP[0], IMM[4]\n"
743                 "               BGNLOOP\n"
744                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
745                 "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
746                 "                       IF TEMP[2]\n"
747                 "                               BRK\n"
748                 "                       ENDIF\n"
749                 "               ENDLOOP\n"
750                 "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
751                 "               MFENCE RLOCAL\n"
752                 "               BGNLOOP\n"
753                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
754                 "                       USEQ TEMP[2].x, TEMP[2], IMM[1]\n"
755                 "                       IF TEMP[2]\n"
756                 "                               BRK\n"
757                 "                       ENDIF\n"
758                 "               ENDLOOP\n"
759                 "       ELSE\n"
760                 "               UADD TEMP[1].x, TEMP[0], -IMM[4]\n"
761                 "               BGNLOOP\n"
762                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
763                 "                       USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n"
764                 "                       IF TEMP[2]\n"
765                 "                               BRK\n"
766                 "                       ENDIF\n"
767                 "               ENDLOOP\n"
768                 "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
769                 "               MFENCE RLOCAL\n"
770                 "               BGNLOOP\n"
771                 "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
772                 "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
773                 "                       IF TEMP[2]\n"
774                 "                               BRK\n"
775                 "                       ENDIF\n"
776                 "               ENDLOOP\n"
777                 "               STORE RLOCAL.x, TEMP[0], IMM[1]\n"
778                 "               MFENCE RLOCAL\n"
779                 "       ENDIF\n"
780                 "       UMUL TEMP[1].x, SV[0], SV[1]\n"
781                 "       UMUL TEMP[1].x, TEMP[1], IMM[2]\n"
782                 "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
783                 "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
784                 "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
785                 "       RET\n"
786                 "    ENDSUB\n";
787         void init(void *p, int s, int x, int y) {
788                 *(uint32_t *)p = 0xdeadbeef;
789         }
790         void expect(void *p, int s, int x, int y) {
791                 *(uint32_t *)p = x & 0x20 ? 2 : 1;
792         }
793 
794         printf("- %s\n", __func__);
795 
796         init_prog(ctx, 256, 0, 0, src, NULL);
797         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
798                  4096, 0, init);
799         init_compute_resources(ctx, (int []) { 0, -1 });
800         launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
801         check_tex(ctx, 0, expect, NULL);
802         destroy_compute_resources(ctx);
803         destroy_tex(ctx);
804         destroy_prog(ctx);
805 }
806 
test_sample(struct context * ctx)807 static void test_sample(struct context *ctx)
808 {
809         const char *src = "COMP\n"
810                 "DCL SVIEW[0], 2D, FLOAT\n"
811                 "DCL RES[0], 2D, RAW, WR\n"
812                 "DCL SAMP[0]\n"
813                 "DCL SV[0], BLOCK_ID[0]\n"
814                 "DCL TEMP[0], LOCAL\n"
815                 "DCL TEMP[1], LOCAL\n"
816                 "IMM UINT32 { 16, 1, 0, 0 }\n"
817                 "IMM FLT32 { 128, 32, 0, 0 }\n"
818                 "\n"
819                 "    BGNSUB\n"
820                 "       I2F TEMP[1], SV[0]\n"
821                 "       DIV TEMP[1], TEMP[1], IMM[1]\n"
822                 "       SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n"
823                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
824                 "       STORE RES[0].xyzw, TEMP[0], TEMP[1]\n"
825                 "       RET\n"
826                 "    ENDSUB\n";
827         void init(void *p, int s, int x, int y) {
828                 *(float *)p = s ? 1 : x * y;
829         }
830         void expect(void *p, int s, int x, int y) {
831                 switch (x % 4) {
832                 case 0:
833                         *(float *)p = x / 4 * y;
834                         break;
835                 case 1:
836                 case 2:
837                         *(float *)p = 0;
838                         break;
839                 case 3:
840                         *(float *)p = 1;
841                         break;
842                 }
843         }
844 
845         printf("- %s\n", __func__);
846 
847         init_prog(ctx, 0, 0, 0, src, NULL);
848         init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
849                  128, 32, init);
850         init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
851                  512, 32, init);
852         init_compute_resources(ctx, (int []) { 1, -1 });
853         init_sampler_views(ctx, (int []) { 0, -1 });
854         init_sampler_states(ctx, 2);
855         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL);
856         check_tex(ctx, 1, expect, NULL);
857         destroy_sampler_states(ctx);
858         destroy_sampler_views(ctx);
859         destroy_compute_resources(ctx);
860         destroy_tex(ctx);
861         destroy_prog(ctx);
862 }
863 
test_many_kern(struct context * ctx)864 static void test_many_kern(struct context *ctx)
865 {
866         const char *src = "COMP\n"
867                 "DCL RES[0], BUFFER, RAW, WR\n"
868                 "DCL TEMP[0], LOCAL\n"
869                 "IMM UINT32 { 0, 1, 2, 3 }\n"
870                 "IMM UINT32 { 4, 0, 0, 0 }\n"
871                 "\n"
872                 "    BGNSUB\n"
873                 "       UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n"
874                 "       STORE RES[0].x, TEMP[0], IMM[0].xxxx\n"
875                 "       RET\n"
876                 "    ENDSUB\n"
877                 "    BGNSUB\n"
878                 "       UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n"
879                 "       STORE RES[0].x, TEMP[0], IMM[0].yyyy\n"
880                 "       RET\n"
881                 "    ENDSUB\n"
882                 "    BGNSUB\n"
883                 "       UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n"
884                 "       STORE RES[0].x, TEMP[0], IMM[0].zzzz\n"
885                 "       RET\n"
886                 "    ENDSUB\n"
887                 "    BGNSUB\n"
888                 "       UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n"
889                 "       STORE RES[0].x, TEMP[0], IMM[0].wwww\n"
890                 "       RET\n"
891                 "    ENDSUB\n";
892         void init(void *p, int s, int x, int y) {
893                 *(uint32_t *)p = 0xdeadbeef;
894         }
895         void expect(void *p, int s, int x, int y) {
896                 *(uint32_t *)p = x;
897         }
898 
899         printf("- %s\n", __func__);
900 
901         init_prog(ctx, 0, 0, 0, src, NULL);
902         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
903                  16, 0, init);
904         init_compute_resources(ctx, (int []) { 0, -1 });
905         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
906         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL);
907         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL);
908         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL);
909         check_tex(ctx, 0, expect, NULL);
910         destroy_compute_resources(ctx);
911         destroy_tex(ctx);
912         destroy_prog(ctx);
913 }
914 
test_constant(struct context * ctx)915 static void test_constant(struct context *ctx)
916 {
917         const char *src = "COMP\n"
918                 "DCL RES[0], BUFFER, RAW\n"
919                 "DCL RES[1], BUFFER, RAW, WR\n"
920                 "DCL SV[0], BLOCK_ID[0]\n"
921                 "DCL TEMP[0], LOCAL\n"
922                 "DCL TEMP[1], LOCAL\n"
923                 "IMM UINT32 { 4, 0, 0, 0 }\n"
924                 "\n"
925                 "    BGNSUB\n"
926                 "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
927                 "       LOAD TEMP[1].x, RES[0], TEMP[0]\n"
928                 "       STORE RES[1].x, TEMP[0], TEMP[1]\n"
929                 "       RET\n"
930                 "    ENDSUB\n";
931         void init(void *p, int s, int x, int y) {
932                 *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x;
933         }
934         void expect(void *p, int s, int x, int y) {
935                 *(float *)p = 8.0 - (float)x;
936         }
937 
938         printf("- %s\n", __func__);
939 
940         init_prog(ctx, 0, 0, 0, src, NULL);
941         init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
942                  256, 0, init);
943         init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
944                  256, 0, init);
945         init_compute_resources(ctx, (int []) { 0, 1, -1 });
946         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
947         check_tex(ctx, 1, expect, NULL);
948         destroy_compute_resources(ctx);
949         destroy_tex(ctx);
950         destroy_prog(ctx);
951 }
952 
test_resource_indirect(struct context * ctx)953 static void test_resource_indirect(struct context *ctx)
954 {
955         const char *src = "COMP\n"
956                 "DCL RES[0], BUFFER, RAW, WR\n"
957                 "DCL RES[1..3], BUFFER, RAW\n"
958                 "DCL SV[0], BLOCK_ID[0]\n"
959                 "DCL TEMP[0], LOCAL\n"
960                 "DCL TEMP[1], LOCAL\n"
961                 "IMM UINT32 { 4, 0, 0, 0 }\n"
962                 "\n"
963                 "    BGNSUB\n"
964                 "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
965                 "       LOAD TEMP[1].x, RES[1], TEMP[0]\n"
966                 "       LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n"
967                 "       STORE RES[0].x, TEMP[0], TEMP[1]\n"
968                 "       RET\n"
969                 "    ENDSUB\n";
970         void init(void *p, int s, int x, int y) {
971                 *(uint32_t *)p = s == 0 ? 0xdeadbeef :
972                    s == 1 ? x % 2 :
973                    s == 2 ? 2 * x :
974                    2 * x + 1;
975         }
976         void expect(void *p, int s, int x, int y) {
977            *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0);
978         }
979 
980         printf("- %s\n", __func__);
981 
982         init_prog(ctx, 0, 0, 0, src, NULL);
983         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
984                  256, 0, init);
985         init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
986                  256, 0, init);
987         init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
988                  256, 0, init);
989         init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
990                  256, 0, init);
991         init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 });
992         launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
993         check_tex(ctx, 0, expect, NULL);
994         destroy_compute_resources(ctx);
995         destroy_tex(ctx);
996         destroy_prog(ctx);
997 }
998 
999 enum pipe_format surface_fmts[] = {
1000         PIPE_FORMAT_B8G8R8A8_UNORM,
1001         PIPE_FORMAT_B8G8R8X8_UNORM,
1002         PIPE_FORMAT_A8R8G8B8_UNORM,
1003         PIPE_FORMAT_X8R8G8B8_UNORM,
1004         PIPE_FORMAT_X8R8G8B8_UNORM,
1005         PIPE_FORMAT_L8_UNORM,
1006         PIPE_FORMAT_A8_UNORM,
1007         PIPE_FORMAT_I8_UNORM,
1008         PIPE_FORMAT_L8A8_UNORM,
1009         PIPE_FORMAT_R32_FLOAT,
1010         PIPE_FORMAT_R32G32_FLOAT,
1011         PIPE_FORMAT_R32G32B32A32_FLOAT,
1012         PIPE_FORMAT_R32_UNORM,
1013         PIPE_FORMAT_R32G32_UNORM,
1014         PIPE_FORMAT_R32G32B32A32_UNORM,
1015         PIPE_FORMAT_R32_SNORM,
1016         PIPE_FORMAT_R32G32_SNORM,
1017         PIPE_FORMAT_R32G32B32A32_SNORM,
1018         PIPE_FORMAT_R8_UINT,
1019         PIPE_FORMAT_R8G8_UINT,
1020         PIPE_FORMAT_R8G8B8A8_UINT,
1021         PIPE_FORMAT_R8_SINT,
1022         PIPE_FORMAT_R8G8_SINT,
1023         PIPE_FORMAT_R8G8B8A8_SINT,
1024         PIPE_FORMAT_R32_UINT,
1025         PIPE_FORMAT_R32G32_UINT,
1026         PIPE_FORMAT_R32G32B32A32_UINT,
1027         PIPE_FORMAT_R32_SINT,
1028         PIPE_FORMAT_R32G32_SINT,
1029         PIPE_FORMAT_R32G32B32A32_SINT
1030 };
1031 
test_surface_ld(struct context * ctx)1032 static void test_surface_ld(struct context *ctx)
1033 {
1034         const char *src = "COMP\n"
1035                 "DCL RES[0], 2D\n"
1036                 "DCL RES[1], 2D, RAW, WR\n"
1037                 "DCL SV[0], BLOCK_ID[0]\n"
1038                 "DCL TEMP[0], LOCAL\n"
1039                 "DCL TEMP[1], LOCAL\n"
1040                 "IMM UINT32 { 16, 1, 0, 0 }\n"
1041                 "\n"
1042                 "    BGNSUB\n"
1043                 "       LOAD TEMP[1], RES[0], SV[0]\n"
1044                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
1045                 "       STORE RES[1].xyzw, TEMP[0], TEMP[1]\n"
1046                 "       RET\n"
1047                 "    ENDSUB\n";
1048         int i = 0;
1049         void init0f(void *p, int s, int x, int y) {
1050                 float v[] = { 1.0, -.75, .50, -.25 };
1051                 util_format_write_4f(surface_fmts[i], v, 0,
1052                                      p, 0, 0, 0, 1, 1);
1053         }
1054         void init0i(void *p, int s, int x, int y) {
1055                 int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
1056                 util_format_write_4i(surface_fmts[i], v, 0,
1057                                      p, 0, 0, 0, 1, 1);
1058         }
1059         void init1(void *p, int s, int x, int y) {
1060                 *(uint32_t *)p = 0xdeadbeef;
1061         }
1062         void expectf(void *p, int s, int x, int y) {
1063                 float v[4], w[4];
1064                 init0f(v, s, x / 4, y);
1065                 util_format_read_4f(surface_fmts[i], w, 0,
1066                                     v, 0, 0, 0, 1, 1);
1067                 *(float *)p = w[x % 4];
1068         }
1069         void expecti(void *p, int s, int x, int y) {
1070                 int32_t v[4], w[4];
1071                 init0i(v, s, x / 4, y);
1072                 util_format_read_4i(surface_fmts[i], w, 0,
1073                                     v, 0, 0, 0, 1, 1);
1074                 *(uint32_t *)p = w[x % 4];
1075         }
1076 
1077         printf("- %s\n", __func__);
1078 
1079         init_prog(ctx, 0, 0, 0, src, NULL);
1080 
1081         for (i = 0; i < Elements(surface_fmts); i++) {
1082                 bool is_int = util_format_is_pure_integer(surface_fmts[i]);
1083 
1084                 printf("   - %s\n", util_format_name(surface_fmts[i]));
1085 
1086                 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i],
1087                          128, 32, (is_int ? init0i : init0f));
1088                 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
1089                          512, 32, init1);
1090                 init_compute_resources(ctx, (int []) { 0, 1, -1 });
1091                 init_sampler_states(ctx, 2);
1092                 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
1093                             NULL);
1094                 check_tex(ctx, 1, (is_int ? expecti : expectf), NULL);
1095                 destroy_sampler_states(ctx);
1096                 destroy_compute_resources(ctx);
1097                 destroy_tex(ctx);
1098         }
1099 
1100         destroy_prog(ctx);
1101 }
1102 
test_surface_st(struct context * ctx)1103 static void test_surface_st(struct context *ctx)
1104 {
1105         const char *src = "COMP\n"
1106                 "DCL RES[0], 2D, RAW\n"
1107                 "DCL RES[1], 2D, WR\n"
1108                 "DCL SV[0], BLOCK_ID[0]\n"
1109                 "DCL TEMP[0], LOCAL\n"
1110                 "DCL TEMP[1], LOCAL\n"
1111                 "IMM UINT32 { 16, 1, 0, 0 }\n"
1112                 "\n"
1113                 "    BGNSUB\n"
1114                 "       UMUL TEMP[0], SV[0], IMM[0]\n"
1115                 "       LOAD TEMP[1], RES[0], TEMP[0]\n"
1116                 "       STORE RES[1], SV[0], TEMP[1]\n"
1117                 "       RET\n"
1118                 "    ENDSUB\n";
1119         int i = 0;
1120         void init0f(void *p, int s, int x, int y) {
1121                 float v[] = { 1.0, -.75, 0.5, -.25 };
1122                 *(float *)p = v[x % 4];
1123         }
1124         void init0i(void *p, int s, int x, int y) {
1125                 int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
1126                 *(int32_t *)p = v[x % 4];
1127         }
1128         void init1(void *p, int s, int x, int y) {
1129                 memset(p, 1, util_format_get_blocksize(surface_fmts[i]));
1130         }
1131         void expectf(void *p, int s, int x, int y) {
1132                 float vf[4];
1133                 int j;
1134 
1135                 for (j = 0; j < 4; j++)
1136                         init0f(&vf[j], s, 4 * x + j, y);
1137                 util_format_write_4f(surface_fmts[i], vf, 0,
1138                                      p, 0, 0, 0, 1, 1);
1139         }
1140         void expects(void *p, int s, int x, int y) {
1141                 int32_t v[4];
1142                 int j;
1143 
1144                 for (j = 0; j < 4; j++)
1145                         init0i(&v[j], s, 4 * x + j, y);
1146                 util_format_write_4i(surface_fmts[i], v, 0,
1147                                      p, 0, 0, 0, 1, 1);
1148         }
1149         void expectu(void *p, int s, int x, int y) {
1150                 uint32_t v[4];
1151                 int j;
1152 
1153                 for (j = 0; j < 4; j++)
1154                         init0i(&v[j], s, 4 * x + j, y);
1155                 util_format_write_4ui(surface_fmts[i], v, 0,
1156                                       p, 0, 0, 0, 1, 1);
1157         }
1158         bool check(void *x, void *y, int sz) {
1159                 int j;
1160 
1161                 if (util_format_is_float(surface_fmts[i])) {
1162                         return fabs(*(float *)x - *(float *)y) < 3.92156863e-3;
1163 
1164                 } else if ((sz % 4) == 0) {
1165                         for (j = 0; j < sz / 4; j++)
1166                                 if (abs(((uint32_t *)x)[j] -
1167                                         ((uint32_t *)y)[j]) > 1)
1168                                         return false;
1169                         return true;
1170                 } else {
1171                         return !memcmp(x, y, sz);
1172                 }
1173         }
1174 
1175         printf("- %s\n", __func__);
1176 
1177         init_prog(ctx, 0, 0, 0, src, NULL);
1178 
1179         for (i = 0; i < Elements(surface_fmts); i++) {
1180                 bool is_signed = (util_format_description(surface_fmts[i])
1181                                   ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED);
1182                 bool is_int = util_format_is_pure_integer(surface_fmts[i]);
1183 
1184                 printf("   - %s\n", util_format_name(surface_fmts[i]));
1185 
1186                 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
1187                          512, 32, (is_int ? init0i : init0f));
1188                 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i],
1189                          128, 32, init1);
1190                 init_compute_resources(ctx, (int []) { 0, 1, -1 });
1191                 init_sampler_states(ctx, 2);
1192                 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
1193                             NULL);
1194                 check_tex(ctx, 1, (is_int && is_signed ? expects :
1195                                    is_int && !is_signed ? expectu :
1196                                    expectf), check);
1197                 destroy_sampler_states(ctx);
1198                 destroy_compute_resources(ctx);
1199                 destroy_tex(ctx);
1200         }
1201 
1202         destroy_prog(ctx);
1203 }
1204 
test_barrier(struct context * ctx)1205 static void test_barrier(struct context *ctx)
1206 {
1207         const char *src = "COMP\n"
1208                 "DCL RES[0], BUFFER, RAW, WR\n"
1209                 "DCL SV[0], BLOCK_ID[0]\n"
1210                 "DCL SV[1], BLOCK_SIZE[0]\n"
1211                 "DCL SV[2], THREAD_ID[0]\n"
1212                 "DCL TEMP[0], LOCAL\n"
1213                 "DCL TEMP[1], LOCAL\n"
1214                 "DCL TEMP[2], LOCAL\n"
1215                 "DCL TEMP[3], LOCAL\n"
1216                 "IMM UINT32 { 1, 0, 0, 0 }\n"
1217                 "IMM UINT32 { 4, 0, 0, 0 }\n"
1218                 "IMM UINT32 { 32, 0, 0, 0 }\n"
1219                 "\n"
1220                 "    BGNSUB\n"
1221                 "       UMUL TEMP[0].x, SV[2], IMM[1]\n"
1222                 "       MOV TEMP[1].x, IMM[0].wwww\n"
1223                 "       BGNLOOP\n"
1224                 "               BARRIER\n"
1225                 "               STORE RLOCAL.x, TEMP[0], TEMP[1]\n"
1226                 "               BARRIER\n"
1227                 "               MOV TEMP[2].x, IMM[0].wwww\n"
1228                 "               BGNLOOP\n"
1229                 "                       UMUL TEMP[3].x, TEMP[2], IMM[1]\n"
1230                 "                       LOAD TEMP[3].x, RLOCAL, TEMP[3]\n"
1231                 "                       USNE TEMP[3].x, TEMP[3], TEMP[1]\n"
1232                 "                       IF TEMP[3]\n"
1233                 "                               END\n"
1234                 "                       ENDIF\n"
1235                 "                       UADD TEMP[2].x, TEMP[2], IMM[0]\n"
1236                 "                       USEQ TEMP[3].x, TEMP[2], SV[1]\n"
1237                 "                       IF TEMP[3]\n"
1238                 "                               BRK\n"
1239                 "                       ENDIF\n"
1240                 "               ENDLOOP\n"
1241                 "               UADD TEMP[1].x, TEMP[1], IMM[0]\n"
1242                 "               USEQ TEMP[2].x, TEMP[1], IMM[2]\n"
1243                 "               IF TEMP[2]\n"
1244                 "                       BRK\n"
1245                 "               ENDIF\n"
1246                 "       ENDLOOP\n"
1247                 "       UMUL TEMP[1].x, SV[0], SV[1]\n"
1248                 "       UMUL TEMP[1].x, TEMP[1], IMM[1]\n"
1249                 "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
1250                 "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
1251                 "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
1252                 "       RET\n"
1253                 "    ENDSUB\n";
1254         void init(void *p, int s, int x, int y) {
1255                 *(uint32_t *)p = 0xdeadbeef;
1256         }
1257         void expect(void *p, int s, int x, int y) {
1258                 *(uint32_t *)p = 31;
1259         }
1260 
1261         printf("- %s\n", __func__);
1262 
1263         init_prog(ctx, 256, 0, 0, src, NULL);
1264         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1265                  4096, 0, init);
1266         init_compute_resources(ctx, (int []) { 0, -1 });
1267         launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
1268         check_tex(ctx, 0, expect, NULL);
1269         destroy_compute_resources(ctx);
1270         destroy_tex(ctx);
1271         destroy_prog(ctx);
1272 }
1273 
test_atom_ops(struct context * ctx,bool global)1274 static void test_atom_ops(struct context *ctx, bool global)
1275 {
1276         const char *src = "COMP\n"
1277                 "#ifdef TARGET_GLOBAL\n"
1278                 "#define target RES[0]\n"
1279                 "#else\n"
1280                 "#define target RLOCAL\n"
1281                 "#endif\n"
1282                 ""
1283                 "DCL RES[0], BUFFER, RAW, WR\n"
1284                 "#define threadid SV[0]\n"
1285                 "DCL threadid, THREAD_ID[0]\n"
1286                 ""
1287                 "#define offset TEMP[0]\n"
1288                 "DCL offset, LOCAL\n"
1289                 "#define tmp TEMP[1]\n"
1290                 "DCL tmp, LOCAL\n"
1291                 ""
1292                 "#define k0 IMM[0]\n"
1293                 "IMM UINT32 { 0, 0, 0, 0 }\n"
1294                 "#define k1 IMM[1]\n"
1295                 "IMM UINT32 { 1, 0, 0, 0 }\n"
1296                 "#define k2 IMM[2]\n"
1297                 "IMM UINT32 { 2, 0, 0, 0 }\n"
1298                 "#define k3 IMM[3]\n"
1299                 "IMM UINT32 { 3, 0, 0, 0 }\n"
1300                 "#define k4 IMM[4]\n"
1301                 "IMM UINT32 { 4, 0, 0, 0 }\n"
1302                 "#define k5 IMM[5]\n"
1303                 "IMM UINT32 { 5, 0, 0, 0 }\n"
1304                 "#define k6 IMM[6]\n"
1305                 "IMM UINT32 { 6, 0, 0, 0 }\n"
1306                 "#define k7 IMM[7]\n"
1307                 "IMM UINT32 { 7, 0, 0, 0 }\n"
1308                 "#define k8 IMM[8]\n"
1309                 "IMM UINT32 { 8, 0, 0, 0 }\n"
1310                 "#define k9 IMM[9]\n"
1311                 "IMM UINT32 { 9, 0, 0, 0 }\n"
1312                 "#define korig IMM[10].xxxx\n"
1313                 "#define karg IMM[10].yyyy\n"
1314                 "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n"
1315                 "\n"
1316                 "    BGNSUB\n"
1317                 "       UMUL offset.x, threadid, k4\n"
1318                 "       STORE target.x, offset, korig\n"
1319                 "       USEQ tmp.x, threadid, k0\n"
1320                 "       IF tmp\n"
1321                 "               ATOMUADD tmp.x, target, offset, karg\n"
1322                 "               ATOMUADD tmp.x, target, offset, tmp\n"
1323                 "       ENDIF\n"
1324                 "       USEQ tmp.x, threadid, k1\n"
1325                 "       IF tmp\n"
1326                 "               ATOMXCHG tmp.x, target, offset, karg\n"
1327                 "               ATOMXCHG tmp.x, target, offset, tmp\n"
1328                 "       ENDIF\n"
1329                 "       USEQ tmp.x, threadid, k2\n"
1330                 "       IF tmp\n"
1331                 "               ATOMCAS tmp.x, target, offset, korig, karg\n"
1332                 "               ATOMCAS tmp.x, target, offset, tmp, k0\n"
1333                 "       ENDIF\n"
1334                 "       USEQ tmp.x, threadid, k3\n"
1335                 "       IF tmp\n"
1336                 "               ATOMAND tmp.x, target, offset, karg\n"
1337                 "               ATOMAND tmp.x, target, offset, tmp\n"
1338                 "       ENDIF\n"
1339                 "       USEQ tmp.x, threadid, k4\n"
1340                 "       IF tmp\n"
1341                 "               ATOMOR tmp.x, target, offset, karg\n"
1342                 "               ATOMOR tmp.x, target, offset, tmp\n"
1343                 "       ENDIF\n"
1344                 "       USEQ tmp.x, threadid, k5\n"
1345                 "       IF tmp\n"
1346                 "               ATOMXOR tmp.x, target, offset, karg\n"
1347                 "               ATOMXOR tmp.x, target, offset, tmp\n"
1348                 "       ENDIF\n"
1349                 "       USEQ tmp.x, threadid, k6\n"
1350                 "       IF tmp\n"
1351                 "               ATOMUMIN tmp.x, target, offset, karg\n"
1352                 "               ATOMUMIN tmp.x, target, offset, tmp\n"
1353                 "       ENDIF\n"
1354                 "       USEQ tmp.x, threadid, k7\n"
1355                 "       IF tmp\n"
1356                 "               ATOMUMAX tmp.x, target, offset, karg\n"
1357                 "               ATOMUMAX tmp.x, target, offset, tmp\n"
1358                 "       ENDIF\n"
1359                 "       USEQ tmp.x, threadid, k8\n"
1360                 "       IF tmp\n"
1361                 "               ATOMIMIN tmp.x, target, offset, karg\n"
1362                 "               ATOMIMIN tmp.x, target, offset, tmp\n"
1363                 "       ENDIF\n"
1364                 "       USEQ tmp.x, threadid, k9\n"
1365                 "       IF tmp\n"
1366                 "               ATOMIMAX tmp.x, target, offset, karg\n"
1367                 "               ATOMIMAX tmp.x, target, offset, tmp\n"
1368                 "       ENDIF\n"
1369                 "#ifdef TARGET_LOCAL\n"
1370                 "       LOAD tmp.x, RLOCAL, offset\n"
1371                 "       STORE RES[0].x, offset, tmp\n"
1372                 "#endif\n"
1373                 "       RET\n"
1374                 "    ENDSUB\n";
1375 
1376         void init(void *p, int s, int x, int y) {
1377                 *(uint32_t *)p = 0xbad;
1378         }
1379         void expect(void *p, int s, int x, int y) {
1380                 switch (x) {
1381                 case 0:
1382                         *(uint32_t *)p = 0xce6c8eef;
1383                         break;
1384                 case 1:
1385                         *(uint32_t *)p = 0xdeadbeef;
1386                         break;
1387                 case 2:
1388                         *(uint32_t *)p = 0x11111111;
1389                         break;
1390                 case 3:
1391                         *(uint32_t *)p = 0x10011001;
1392                         break;
1393                 case 4:
1394                         *(uint32_t *)p = 0xdfbdbfff;
1395                         break;
1396                 case 5:
1397                         *(uint32_t *)p = 0x11111111;
1398                         break;
1399                 case 6:
1400                         *(uint32_t *)p = 0x11111111;
1401                         break;
1402                 case 7:
1403                         *(uint32_t *)p = 0xdeadbeef;
1404                         break;
1405                 case 8:
1406                         *(uint32_t *)p = 0xdeadbeef;
1407                         break;
1408                 case 9:
1409                         *(uint32_t *)p = 0x11111111;
1410                         break;
1411                 }
1412         }
1413 
1414         printf("- %s (%s)\n", __func__, global ? "global" : "local");
1415 
1416         init_prog(ctx, 40, 0, 0, src,
1417                   (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
1418         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1419                  40, 0, init);
1420         init_compute_resources(ctx, (int []) { 0, -1 });
1421         launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
1422         check_tex(ctx, 0, expect, NULL);
1423         destroy_compute_resources(ctx);
1424         destroy_tex(ctx);
1425         destroy_prog(ctx);
1426 }
1427 
test_atom_race(struct context * ctx,bool global)1428 static void test_atom_race(struct context *ctx, bool global)
1429 {
1430         const char *src = "COMP\n"
1431                 "#ifdef TARGET_GLOBAL\n"
1432                 "#define target RES[0]\n"
1433                 "#else\n"
1434                 "#define target RLOCAL\n"
1435                 "#endif\n"
1436                 ""
1437                 "DCL RES[0], BUFFER, RAW, WR\n"
1438                 ""
1439                 "#define blockid SV[0]\n"
1440                 "DCL blockid, BLOCK_ID[0]\n"
1441                 "#define blocksz SV[1]\n"
1442                 "DCL blocksz, BLOCK_SIZE[0]\n"
1443                 "#define threadid SV[2]\n"
1444                 "DCL threadid, THREAD_ID[0]\n"
1445                 ""
1446                 "#define offset TEMP[0]\n"
1447                 "DCL offset, LOCAL\n"
1448                 "#define arg TEMP[1]\n"
1449                 "DCL arg, LOCAL\n"
1450                 "#define count TEMP[2]\n"
1451                 "DCL count, LOCAL\n"
1452                 "#define vlocal TEMP[3]\n"
1453                 "DCL vlocal, LOCAL\n"
1454                 "#define vshared TEMP[4]\n"
1455                 "DCL vshared, LOCAL\n"
1456                 "#define last TEMP[5]\n"
1457                 "DCL last, LOCAL\n"
1458                 "#define tmp0 TEMP[6]\n"
1459                 "DCL tmp0, LOCAL\n"
1460                 "#define tmp1 TEMP[7]\n"
1461                 "DCL tmp1, LOCAL\n"
1462                 ""
1463                 "#define k0 IMM[0]\n"
1464                 "IMM UINT32 { 0, 0, 0, 0 }\n"
1465                 "#define k1 IMM[1]\n"
1466                 "IMM UINT32 { 1, 0, 0, 0 }\n"
1467                 "#define k4 IMM[2]\n"
1468                 "IMM UINT32 { 4, 0, 0, 0 }\n"
1469                 "#define k32 IMM[3]\n"
1470                 "IMM UINT32 { 32, 0, 0, 0 }\n"
1471                 "#define k128 IMM[4]\n"
1472                 "IMM UINT32 { 128, 0, 0, 0 }\n"
1473                 "#define kdeadcafe IMM[5]\n"
1474                 "IMM UINT32 { 3735931646, 0, 0, 0 }\n"
1475                 "#define kallowed_set IMM[6]\n"
1476                 "IMM UINT32 { 559035650, 0, 0, 0 }\n"
1477                 "#define k11111111 IMM[7]\n"
1478                 "IMM UINT32 { 286331153, 0, 0, 0 }\n"
1479                 "\n"
1480                 "    BGNSUB\n"
1481                 "       MOV offset.x, threadid\n"
1482                 "#ifdef TARGET_GLOBAL\n"
1483                 "       UMUL tmp0.x, blockid, blocksz\n"
1484                 "       UADD offset.x, offset, tmp0\n"
1485                 "#endif\n"
1486                 "       UMUL offset.x, offset, k4\n"
1487                 "       USLT tmp0.x, threadid, k32\n"
1488                 "       STORE target.x, offset, k0\n"
1489                 "       BARRIER\n"
1490                 "       IF tmp0\n"
1491                 "               MOV vlocal.x, k0\n"
1492                 "               MOV arg.x, kdeadcafe\n"
1493                 "               BGNLOOP\n"
1494                 "                       INEG arg.x, arg\n"
1495                 "                       ATOMUADD vshared.x, target, offset, arg\n"
1496                 "                       SFENCE target\n"
1497                 "                       USNE tmp0.x, vshared, vlocal\n"
1498                 "                       IF tmp0\n"
1499                 "                               BRK\n"
1500                 "                       ENDIF\n"
1501                 "                       UADD vlocal.x, vlocal, arg\n"
1502                 "               ENDLOOP\n"
1503                 "               UADD vlocal.x, vshared, arg\n"
1504                 "               LOAD vshared.x, target, offset\n"
1505                 "               USEQ tmp0.x, vshared, vlocal\n"
1506                 "               STORE target.x, offset, tmp0\n"
1507                 "       ELSE\n"
1508                 "               UADD offset.x, offset, -k128\n"
1509                 "               MOV count.x, k0\n"
1510                 "               MOV last.x, k0\n"
1511                 "               BGNLOOP\n"
1512                 "                       LOAD vshared.x, target, offset\n"
1513                 "                       USEQ tmp0.x, vshared, kallowed_set.xxxx\n"
1514                 "                       USEQ tmp1.x, vshared, kallowed_set.yyyy\n"
1515                 "                       OR tmp0.x, tmp0, tmp1\n"
1516                 "                       IF tmp0\n"
1517                 "                               USEQ tmp0.x, vshared, last\n"
1518                 "                               IF tmp0\n"
1519                 "                                       CONT\n"
1520                 "                               ENDIF\n"
1521                 "                               MOV last.x, vshared\n"
1522                 "                       ELSE\n"
1523                 "                               END\n"
1524                 "                       ENDIF\n"
1525                 "                       UADD count.x, count, k1\n"
1526                 "                       USEQ tmp0.x, count, k128\n"
1527                 "                       IF tmp0\n"
1528                 "                               BRK\n"
1529                 "                       ENDIF\n"
1530                 "               ENDLOOP\n"
1531                 "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
1532                 "               UADD offset.x, offset, k128\n"
1533                 "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
1534                 "               SFENCE target\n"
1535                 "       ENDIF\n"
1536                 "#ifdef TARGET_LOCAL\n"
1537                 "       LOAD tmp0.x, RLOCAL, offset\n"
1538                 "       UMUL tmp1.x, blockid, blocksz\n"
1539                 "       UMUL tmp1.x, tmp1, k4\n"
1540                 "       UADD offset.x, offset, tmp1\n"
1541                 "       STORE RES[0].x, offset, tmp0\n"
1542                 "#endif\n"
1543                 "       RET\n"
1544                 "    ENDSUB\n";
1545 
1546         void init(void *p, int s, int x, int y) {
1547                 *(uint32_t *)p = 0xdeadbeef;
1548         }
1549         void expect(void *p, int s, int x, int y) {
1550                 *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff;
1551         }
1552 
1553         printf("- %s (%s)\n", __func__, global ? "global" : "local");
1554 
1555         init_prog(ctx, 256, 0, 0, src,
1556                   (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
1557         init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1558                  4096, 0, init);
1559         init_compute_resources(ctx, (int []) { 0, -1 });
1560         launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
1561         check_tex(ctx, 0, expect, NULL);
1562         destroy_compute_resources(ctx);
1563         destroy_tex(ctx);
1564         destroy_prog(ctx);
1565 }
1566 
main(int argc,char * argv[])1567 int main(int argc, char *argv[])
1568 {
1569         struct context *ctx = CALLOC_STRUCT(context);
1570 
1571         init_ctx(ctx);
1572         test_system_values(ctx);
1573         test_resource_access(ctx);
1574         test_function_calls(ctx);
1575         test_input_global(ctx);
1576         test_private(ctx);
1577         test_local(ctx);
1578         test_sample(ctx);
1579         test_many_kern(ctx);
1580         test_constant(ctx);
1581         test_resource_indirect(ctx);
1582         test_surface_ld(ctx);
1583         test_surface_st(ctx);
1584         test_barrier(ctx);
1585         test_atom_ops(ctx, true);
1586         test_atom_race(ctx, true);
1587         test_atom_ops(ctx, false);
1588         test_atom_race(ctx, false);
1589         destroy_ctx(ctx);
1590 
1591         return 0;
1592 }
1593