1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 
29 /**
30  * @file
31  * Unit tests for blend LLVM IR generation
32  *
33  * @author Jose Fonseca <jfonseca@vmware.com>
34  *
35  * Blend computation code derived from code written by
36  * @author Brian Paul <brian@vmware.com>
37  */
38 
39 #include "util/u_memory.h"
40 
41 #include "gallivm/lp_bld_init.h"
42 #include "gallivm/lp_bld_type.h"
43 #include "gallivm/lp_bld_debug.h"
44 #include "lp_bld_blend.h"
45 #include "lp_test.h"
46 
47 
48 enum vector_mode
49 {
50    AoS = 0,
51    SoA = 1
52 };
53 
54 
55 typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
56 
57 
58 void
write_tsv_header(FILE * fp)59 write_tsv_header(FILE *fp)
60 {
61    fprintf(fp,
62            "result\t"
63            "cycles_per_channel\t"
64            "mode\t"
65            "type\t"
66            "sep_func\t"
67            "sep_src_factor\t"
68            "sep_dst_factor\t"
69            "rgb_func\t"
70            "rgb_src_factor\t"
71            "rgb_dst_factor\t"
72            "alpha_func\t"
73            "alpha_src_factor\t"
74            "alpha_dst_factor\n");
75 
76    fflush(fp);
77 }
78 
79 
80 static void
write_tsv_row(FILE * fp,const struct pipe_blend_state * blend,enum vector_mode mode,struct lp_type type,double cycles,boolean success)81 write_tsv_row(FILE *fp,
82               const struct pipe_blend_state *blend,
83               enum vector_mode mode,
84               struct lp_type type,
85               double cycles,
86               boolean success)
87 {
88    fprintf(fp, "%s\t", success ? "pass" : "fail");
89 
90    if (mode == AoS) {
91       fprintf(fp, "%.1f\t", cycles / type.length);
92       fprintf(fp, "aos\t");
93    }
94 
95    if (mode == SoA) {
96       fprintf(fp, "%.1f\t", cycles / (4 * type.length));
97       fprintf(fp, "soa\t");
98    }
99 
100    fprintf(fp, "%s%u%sx%u\t",
101            type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
102            type.width,
103            type.norm ? "n" : "",
104            type.length);
105 
106    fprintf(fp,
107            "%s\t%s\t%s\t",
108            blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
109            blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
110            blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
111 
112    fprintf(fp,
113            "%s\t%s\t%s\t%s\t%s\t%s\n",
114            util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
115            util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
116            util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
117            util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
118            util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
119            util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
120 
121    fflush(fp);
122 }
123 
124 
125 static void
dump_blend_type(FILE * fp,const struct pipe_blend_state * blend,enum vector_mode mode,struct lp_type type)126 dump_blend_type(FILE *fp,
127                 const struct pipe_blend_state *blend,
128                 enum vector_mode mode,
129                 struct lp_type type)
130 {
131    fprintf(fp, "%s", mode ? "soa" : "aos");
132 
133    fprintf(fp, " type=%s%u%sx%u",
134            type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
135            type.width,
136            type.norm ? "n" : "",
137            type.length);
138 
139    fprintf(fp,
140            " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
141            "rgb_func",         util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
142            "rgb_src_factor",   util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
143            "rgb_dst_factor",   util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
144            "alpha_func",       util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
145            "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
146            "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
147 
148    fprintf(fp, " ...\n");
149    fflush(fp);
150 }
151 
152 
153 static LLVMValueRef
add_blend_test(struct gallivm_state * gallivm,const struct pipe_blend_state * blend,enum vector_mode mode,struct lp_type type)154 add_blend_test(struct gallivm_state *gallivm,
155                const struct pipe_blend_state *blend,
156                enum vector_mode mode,
157                struct lp_type type)
158 {
159    LLVMModuleRef module = gallivm->module;
160    LLVMContextRef context = gallivm->context;
161    LLVMTypeRef vec_type;
162    LLVMTypeRef args[4];
163    LLVMValueRef func;
164    LLVMValueRef src_ptr;
165    LLVMValueRef dst_ptr;
166    LLVMValueRef const_ptr;
167    LLVMValueRef res_ptr;
168    LLVMBasicBlockRef block;
169    LLVMBuilderRef builder;
170    const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM;
171    const unsigned rt = 0;
172    const unsigned char swizzle[4] = { 0, 1, 2, 3 };
173 
174    vec_type = lp_build_vec_type(gallivm, type);
175 
176    args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
177    func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 4, 0));
178    LLVMSetFunctionCallConv(func, LLVMCCallConv);
179    src_ptr = LLVMGetParam(func, 0);
180    dst_ptr = LLVMGetParam(func, 1);
181    const_ptr = LLVMGetParam(func, 2);
182    res_ptr = LLVMGetParam(func, 3);
183 
184    block = LLVMAppendBasicBlockInContext(context, func, "entry");
185    builder = gallivm->builder;
186    LLVMPositionBuilderAtEnd(builder, block);
187 
188    if (mode == AoS) {
189       LLVMValueRef src;
190       LLVMValueRef dst;
191       LLVMValueRef con;
192       LLVMValueRef res;
193 
194       src = LLVMBuildLoad(builder, src_ptr, "src");
195       dst = LLVMBuildLoad(builder, dst_ptr, "dst");
196       con = LLVMBuildLoad(builder, const_ptr, "const");
197 
198       res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, dst, NULL, con, swizzle);
199 
200       lp_build_name(res, "res");
201 
202       LLVMBuildStore(builder, res, res_ptr);
203    }
204 
205    if (mode == SoA) {
206       LLVMValueRef src[4];
207       LLVMValueRef dst[4];
208       LLVMValueRef con[4];
209       LLVMValueRef res[4];
210       unsigned i;
211 
212       for(i = 0; i < 4; ++i) {
213          LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
214          src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
215          dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
216          con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
217          lp_build_name(src[i], "src.%c", "rgba"[i]);
218          lp_build_name(con[i], "con.%c", "rgba"[i]);
219          lp_build_name(dst[i], "dst.%c", "rgba"[i]);
220       }
221 
222       lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res);
223 
224       for(i = 0; i < 4; ++i) {
225          LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
226          lp_build_name(res[i], "res.%c", "rgba"[i]);
227          LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
228       }
229    }
230 
231    LLVMBuildRetVoid(builder);;
232 
233    return func;
234 }
235 
236 
237 static void
compute_blend_ref_term(unsigned rgb_factor,unsigned alpha_factor,const double * factor,const double * src,const double * dst,const double * con,double * term)238 compute_blend_ref_term(unsigned rgb_factor,
239                        unsigned alpha_factor,
240                        const double *factor,
241                        const double *src,
242                        const double *dst,
243                        const double *con,
244                        double *term)
245 {
246    double temp;
247 
248    switch (rgb_factor) {
249    case PIPE_BLENDFACTOR_ONE:
250       term[0] = factor[0]; /* R */
251       term[1] = factor[1]; /* G */
252       term[2] = factor[2]; /* B */
253       break;
254    case PIPE_BLENDFACTOR_SRC_COLOR:
255       term[0] = factor[0] * src[0]; /* R */
256       term[1] = factor[1] * src[1]; /* G */
257       term[2] = factor[2] * src[2]; /* B */
258       break;
259    case PIPE_BLENDFACTOR_SRC_ALPHA:
260       term[0] = factor[0] * src[3]; /* R */
261       term[1] = factor[1] * src[3]; /* G */
262       term[2] = factor[2] * src[3]; /* B */
263       break;
264    case PIPE_BLENDFACTOR_DST_COLOR:
265       term[0] = factor[0] * dst[0]; /* R */
266       term[1] = factor[1] * dst[1]; /* G */
267       term[2] = factor[2] * dst[2]; /* B */
268       break;
269    case PIPE_BLENDFACTOR_DST_ALPHA:
270       term[0] = factor[0] * dst[3]; /* R */
271       term[1] = factor[1] * dst[3]; /* G */
272       term[2] = factor[2] * dst[3]; /* B */
273       break;
274    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
275       temp = MIN2(src[3], 1.0f - dst[3]);
276       term[0] = factor[0] * temp; /* R */
277       term[1] = factor[1] * temp; /* G */
278       term[2] = factor[2] * temp; /* B */
279       break;
280    case PIPE_BLENDFACTOR_CONST_COLOR:
281       term[0] = factor[0] * con[0]; /* R */
282       term[1] = factor[1] * con[1]; /* G */
283       term[2] = factor[2] * con[2]; /* B */
284       break;
285    case PIPE_BLENDFACTOR_CONST_ALPHA:
286       term[0] = factor[0] * con[3]; /* R */
287       term[1] = factor[1] * con[3]; /* G */
288       term[2] = factor[2] * con[3]; /* B */
289       break;
290    case PIPE_BLENDFACTOR_SRC1_COLOR:
291       assert(0); /* to do */
292       break;
293    case PIPE_BLENDFACTOR_SRC1_ALPHA:
294       assert(0); /* to do */
295       break;
296    case PIPE_BLENDFACTOR_ZERO:
297       term[0] = 0.0f; /* R */
298       term[1] = 0.0f; /* G */
299       term[2] = 0.0f; /* B */
300       break;
301    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
302       term[0] = factor[0] * (1.0f - src[0]); /* R */
303       term[1] = factor[1] * (1.0f - src[1]); /* G */
304       term[2] = factor[2] * (1.0f - src[2]); /* B */
305       break;
306    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
307       term[0] = factor[0] * (1.0f - src[3]); /* R */
308       term[1] = factor[1] * (1.0f - src[3]); /* G */
309       term[2] = factor[2] * (1.0f - src[3]); /* B */
310       break;
311    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
312       term[0] = factor[0] * (1.0f - dst[3]); /* R */
313       term[1] = factor[1] * (1.0f - dst[3]); /* G */
314       term[2] = factor[2] * (1.0f - dst[3]); /* B */
315       break;
316    case PIPE_BLENDFACTOR_INV_DST_COLOR:
317       term[0] = factor[0] * (1.0f - dst[0]); /* R */
318       term[1] = factor[1] * (1.0f - dst[1]); /* G */
319       term[2] = factor[2] * (1.0f - dst[2]); /* B */
320       break;
321    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
322       term[0] = factor[0] * (1.0f - con[0]); /* R */
323       term[1] = factor[1] * (1.0f - con[1]); /* G */
324       term[2] = factor[2] * (1.0f - con[2]); /* B */
325       break;
326    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
327       term[0] = factor[0] * (1.0f - con[3]); /* R */
328       term[1] = factor[1] * (1.0f - con[3]); /* G */
329       term[2] = factor[2] * (1.0f - con[3]); /* B */
330       break;
331    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
332       assert(0); /* to do */
333       break;
334    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
335       assert(0); /* to do */
336       break;
337    default:
338       assert(0);
339    }
340 
341    /*
342     * Compute src/first term A
343     */
344    switch (alpha_factor) {
345    case PIPE_BLENDFACTOR_ONE:
346       term[3] = factor[3]; /* A */
347       break;
348    case PIPE_BLENDFACTOR_SRC_COLOR:
349    case PIPE_BLENDFACTOR_SRC_ALPHA:
350       term[3] = factor[3] * src[3]; /* A */
351       break;
352    case PIPE_BLENDFACTOR_DST_COLOR:
353    case PIPE_BLENDFACTOR_DST_ALPHA:
354       term[3] = factor[3] * dst[3]; /* A */
355       break;
356    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
357       term[3] = src[3]; /* A */
358       break;
359    case PIPE_BLENDFACTOR_CONST_COLOR:
360    case PIPE_BLENDFACTOR_CONST_ALPHA:
361       term[3] = factor[3] * con[3]; /* A */
362       break;
363    case PIPE_BLENDFACTOR_ZERO:
364       term[3] = 0.0f; /* A */
365       break;
366    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
367    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
368       term[3] = factor[3] * (1.0f - src[3]); /* A */
369       break;
370    case PIPE_BLENDFACTOR_INV_DST_COLOR:
371    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
372       term[3] = factor[3] * (1.0f - dst[3]); /* A */
373       break;
374    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
375    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
376       term[3] = factor[3] * (1.0f - con[3]);
377       break;
378    default:
379       assert(0);
380    }
381 }
382 
383 
384 static void
compute_blend_ref(const struct pipe_blend_state * blend,const double * src,const double * dst,const double * con,double * res)385 compute_blend_ref(const struct pipe_blend_state *blend,
386                   const double *src,
387                   const double *dst,
388                   const double *con,
389                   double *res)
390 {
391    double src_term[4];
392    double dst_term[4];
393 
394    compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
395                           src, src, dst, con, src_term);
396    compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
397                           dst, src, dst, con, dst_term);
398 
399    /*
400     * Combine RGB terms
401     */
402    switch (blend->rt[0].rgb_func) {
403    case PIPE_BLEND_ADD:
404       res[0] = src_term[0] + dst_term[0]; /* R */
405       res[1] = src_term[1] + dst_term[1]; /* G */
406       res[2] = src_term[2] + dst_term[2]; /* B */
407       break;
408    case PIPE_BLEND_SUBTRACT:
409       res[0] = src_term[0] - dst_term[0]; /* R */
410       res[1] = src_term[1] - dst_term[1]; /* G */
411       res[2] = src_term[2] - dst_term[2]; /* B */
412       break;
413    case PIPE_BLEND_REVERSE_SUBTRACT:
414       res[0] = dst_term[0] - src_term[0]; /* R */
415       res[1] = dst_term[1] - src_term[1]; /* G */
416       res[2] = dst_term[2] - src_term[2]; /* B */
417       break;
418    case PIPE_BLEND_MIN:
419       res[0] = MIN2(src_term[0], dst_term[0]); /* R */
420       res[1] = MIN2(src_term[1], dst_term[1]); /* G */
421       res[2] = MIN2(src_term[2], dst_term[2]); /* B */
422       break;
423    case PIPE_BLEND_MAX:
424       res[0] = MAX2(src_term[0], dst_term[0]); /* R */
425       res[1] = MAX2(src_term[1], dst_term[1]); /* G */
426       res[2] = MAX2(src_term[2], dst_term[2]); /* B */
427       break;
428    default:
429       assert(0);
430    }
431 
432    /*
433     * Combine A terms
434     */
435    switch (blend->rt[0].alpha_func) {
436    case PIPE_BLEND_ADD:
437       res[3] = src_term[3] + dst_term[3]; /* A */
438       break;
439    case PIPE_BLEND_SUBTRACT:
440       res[3] = src_term[3] - dst_term[3]; /* A */
441       break;
442    case PIPE_BLEND_REVERSE_SUBTRACT:
443       res[3] = dst_term[3] - src_term[3]; /* A */
444       break;
445    case PIPE_BLEND_MIN:
446       res[3] = MIN2(src_term[3], dst_term[3]); /* A */
447       break;
448    case PIPE_BLEND_MAX:
449       res[3] = MAX2(src_term[3], dst_term[3]); /* A */
450       break;
451    default:
452       assert(0);
453    }
454 }
455 
456 
457 PIPE_ALIGN_STACK
458 static boolean
test_one(unsigned verbose,FILE * fp,const struct pipe_blend_state * blend,enum vector_mode mode,struct lp_type type)459 test_one(unsigned verbose,
460          FILE *fp,
461          const struct pipe_blend_state *blend,
462          enum vector_mode mode,
463          struct lp_type type)
464 {
465    struct gallivm_state *gallivm;
466    LLVMValueRef func = NULL;
467    blend_test_ptr_t blend_test_ptr;
468    boolean success;
469    const unsigned n = LP_TEST_NUM_SAMPLES;
470    int64_t cycles[LP_TEST_NUM_SAMPLES];
471    double cycles_avg = 0.0;
472    unsigned i, j;
473    const unsigned stride = lp_type_width(type)/8;
474 
475    if(verbose >= 1)
476       dump_blend_type(stdout, blend, mode, type);
477 
478    gallivm = gallivm_create();
479 
480    func = add_blend_test(gallivm, blend, mode, type);
481 
482    gallivm_compile_module(gallivm);
483 
484    blend_test_ptr = (blend_test_ptr_t)gallivm_jit_function(gallivm, func);
485 
486    success = TRUE;
487    if(mode == AoS) {
488       uint8_t *src, *dst, *con, *res, *ref;
489       src = align_malloc(stride, stride);
490       dst = align_malloc(stride, stride);
491       con = align_malloc(stride, stride);
492       res = align_malloc(stride, stride);
493       ref = align_malloc(stride, stride);
494 
495       for(i = 0; i < n && success; ++i) {
496          int64_t start_counter = 0;
497          int64_t end_counter = 0;
498 
499          random_vec(type, src);
500          random_vec(type, dst);
501          random_vec(type, con);
502 
503          {
504             double fsrc[LP_MAX_VECTOR_LENGTH];
505             double fdst[LP_MAX_VECTOR_LENGTH];
506             double fcon[LP_MAX_VECTOR_LENGTH];
507             double fref[LP_MAX_VECTOR_LENGTH];
508 
509             read_vec(type, src, fsrc);
510             read_vec(type, dst, fdst);
511             read_vec(type, con, fcon);
512 
513             for(j = 0; j < type.length; j += 4)
514                compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
515 
516             write_vec(type, ref, fref);
517          }
518 
519          start_counter = rdtsc();
520          blend_test_ptr(src, dst, con, res);
521          end_counter = rdtsc();
522 
523          cycles[i] = end_counter - start_counter;
524 
525          if(!compare_vec(type, res, ref)) {
526             success = FALSE;
527 
528             if(verbose < 1)
529                dump_blend_type(stderr, blend, mode, type);
530             fprintf(stderr, "MISMATCH\n");
531 
532             fprintf(stderr, "  Src: ");
533             dump_vec(stderr, type, src);
534             fprintf(stderr, "\n");
535 
536             fprintf(stderr, "  Dst: ");
537             dump_vec(stderr, type, dst);
538             fprintf(stderr, "\n");
539 
540             fprintf(stderr, "  Con: ");
541             dump_vec(stderr, type, con);
542             fprintf(stderr, "\n");
543 
544             fprintf(stderr, "  Res: ");
545             dump_vec(stderr, type, res);
546             fprintf(stderr, "\n");
547 
548             fprintf(stderr, "  Ref: ");
549             dump_vec(stderr, type, ref);
550             fprintf(stderr, "\n");
551          }
552       }
553       align_free(src);
554       align_free(dst);
555       align_free(con);
556       align_free(res);
557       align_free(ref);
558    }
559    else if(mode == SoA) {
560       uint8_t *src, *dst, *con, *res, *ref;
561       src = align_malloc(4*stride, stride);
562       dst = align_malloc(4*stride, stride);
563       con = align_malloc(4*stride, stride);
564       res = align_malloc(4*stride, stride);
565       ref = align_malloc(4*stride, stride);
566 
567       for(i = 0; i < n && success; ++i) {
568          int64_t start_counter = 0;
569          int64_t end_counter = 0;
570          boolean mismatch;
571 
572          for(j = 0; j < 4; ++j) {
573             random_vec(type, src + j*stride);
574             random_vec(type, dst + j*stride);
575             random_vec(type, con + j*stride);
576          }
577 
578          {
579             double fsrc[4];
580             double fdst[4];
581             double fcon[4];
582             double fref[4];
583             unsigned k;
584 
585             for(k = 0; k < type.length; ++k) {
586                for(j = 0; j < 4; ++j) {
587                   fsrc[j] = read_elem(type, src + j*stride, k);
588                   fdst[j] = read_elem(type, dst + j*stride, k);
589                   fcon[j] = read_elem(type, con + j*stride, k);
590                }
591 
592                compute_blend_ref(blend, fsrc, fdst, fcon, fref);
593 
594                for(j = 0; j < 4; ++j)
595                   write_elem(type, ref + j*stride, k, fref[j]);
596             }
597          }
598 
599          start_counter = rdtsc();
600          blend_test_ptr(src, dst, con, res);
601          end_counter = rdtsc();
602 
603          cycles[i] = end_counter - start_counter;
604 
605          mismatch = FALSE;
606          for (j = 0; j < 4; ++j)
607             if(!compare_vec(type, res + j*stride, ref + j*stride))
608                mismatch = TRUE;
609 
610          if (mismatch) {
611             success = FALSE;
612 
613             if(verbose < 1)
614                dump_blend_type(stderr, blend, mode, type);
615             fprintf(stderr, "MISMATCH\n");
616             for(j = 0; j < 4; ++j) {
617                char channel = "RGBA"[j];
618                fprintf(stderr, "  Src%c: ", channel);
619                dump_vec(stderr, type, src + j*stride);
620                fprintf(stderr, "\n");
621 
622                fprintf(stderr, "  Dst%c: ", channel);
623                dump_vec(stderr, type, dst + j*stride);
624                fprintf(stderr, "\n");
625 
626                fprintf(stderr, "  Con%c: ", channel);
627                dump_vec(stderr, type, con + j*stride);
628                fprintf(stderr, "\n");
629 
630                fprintf(stderr, "  Res%c: ", channel);
631                dump_vec(stderr, type, res + j*stride);
632                fprintf(stderr, "\n");
633 
634                fprintf(stderr, "  Ref%c: ", channel);
635                dump_vec(stderr, type, ref + j*stride);
636                fprintf(stderr, "\n");
637 
638                fprintf(stderr, "\n");
639             }
640          }
641       }
642       align_free(src);
643       align_free(dst);
644       align_free(con);
645       align_free(res);
646       align_free(ref);
647    }
648 
649    /*
650     * Unfortunately the output of cycle counter is not very reliable as it comes
651     * -- sometimes we get outliers (due IRQs perhaps?) which are
652     * better removed to avoid random or biased data.
653     */
654    {
655       double sum = 0.0, sum2 = 0.0;
656       double avg, std;
657       unsigned m;
658 
659       for(i = 0; i < n; ++i) {
660          sum += cycles[i];
661          sum2 += cycles[i]*cycles[i];
662       }
663 
664       avg = sum/n;
665       std = sqrtf((sum2 - n*avg*avg)/n);
666 
667       m = 0;
668       sum = 0.0;
669       for(i = 0; i < n; ++i) {
670          if(fabs(cycles[i] - avg) <= 4.0*std) {
671             sum += cycles[i];
672             ++m;
673          }
674       }
675 
676       cycles_avg = sum/m;
677 
678    }
679 
680    if(fp)
681       write_tsv_row(fp, blend, mode, type, cycles_avg, success);
682 
683    gallivm_free_function(gallivm, func, blend_test_ptr);
684 
685    gallivm_destroy(gallivm);
686 
687    return success;
688 }
689 
690 
691 const unsigned
692 blend_factors[] = {
693    PIPE_BLENDFACTOR_ZERO,
694    PIPE_BLENDFACTOR_ONE,
695    PIPE_BLENDFACTOR_SRC_COLOR,
696    PIPE_BLENDFACTOR_SRC_ALPHA,
697    PIPE_BLENDFACTOR_DST_COLOR,
698    PIPE_BLENDFACTOR_DST_ALPHA,
699    PIPE_BLENDFACTOR_CONST_COLOR,
700    PIPE_BLENDFACTOR_CONST_ALPHA,
701 #if 0
702    PIPE_BLENDFACTOR_SRC1_COLOR,
703    PIPE_BLENDFACTOR_SRC1_ALPHA,
704 #endif
705    PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
706    PIPE_BLENDFACTOR_INV_SRC_COLOR,
707    PIPE_BLENDFACTOR_INV_SRC_ALPHA,
708    PIPE_BLENDFACTOR_INV_DST_COLOR,
709    PIPE_BLENDFACTOR_INV_DST_ALPHA,
710    PIPE_BLENDFACTOR_INV_CONST_COLOR,
711    PIPE_BLENDFACTOR_INV_CONST_ALPHA,
712 #if 0
713    PIPE_BLENDFACTOR_INV_SRC1_COLOR,
714    PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
715 #endif
716 };
717 
718 
719 const unsigned
720 blend_funcs[] = {
721    PIPE_BLEND_ADD,
722    PIPE_BLEND_SUBTRACT,
723    PIPE_BLEND_REVERSE_SUBTRACT,
724    PIPE_BLEND_MIN,
725    PIPE_BLEND_MAX
726 };
727 
728 
729 const struct lp_type blend_types[] = {
730    /* float, fixed,  sign,  norm, width, len */
731    {   TRUE, FALSE,  TRUE, FALSE,    32,   4 }, /* f32 x 4 */
732    {  FALSE, FALSE, FALSE,  TRUE,     8,  16 }, /* u8n x 16 */
733 };
734 
735 
736 const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
737 const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
738 const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
739 
740 
741 boolean
test_all(unsigned verbose,FILE * fp)742 test_all(unsigned verbose, FILE *fp)
743 {
744    const unsigned *rgb_func;
745    const unsigned *rgb_src_factor;
746    const unsigned *rgb_dst_factor;
747    const unsigned *alpha_func;
748    const unsigned *alpha_src_factor;
749    const unsigned *alpha_dst_factor;
750    struct pipe_blend_state blend;
751    enum vector_mode mode;
752    const struct lp_type *type;
753    boolean success = TRUE;
754 
755    for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
756       for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
757          for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
758             for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
759                for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
760                   for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
761                      for(mode = 0; mode < 2; ++mode) {
762                         for(type = blend_types; type < &blend_types[num_types]; ++type) {
763 
764                            if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
765                               *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
766                               continue;
767 
768                            memset(&blend, 0, sizeof blend);
769                            blend.rt[0].blend_enable      = 1;
770                            blend.rt[0].rgb_func          = *rgb_func;
771                            blend.rt[0].rgb_src_factor    = *rgb_src_factor;
772                            blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
773                            blend.rt[0].alpha_func        = *alpha_func;
774                            blend.rt[0].alpha_src_factor  = *alpha_src_factor;
775                            blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
776                            blend.rt[0].colormask         = PIPE_MASK_RGBA;
777 
778                            if(!test_one(verbose, fp, &blend, mode, *type))
779                              success = FALSE;
780 
781                         }
782                      }
783                   }
784                }
785             }
786          }
787       }
788    }
789 
790    return success;
791 }
792 
793 
794 boolean
test_some(unsigned verbose,FILE * fp,unsigned long n)795 test_some(unsigned verbose, FILE *fp,
796           unsigned long n)
797 {
798    const unsigned *rgb_func;
799    const unsigned *rgb_src_factor;
800    const unsigned *rgb_dst_factor;
801    const unsigned *alpha_func;
802    const unsigned *alpha_src_factor;
803    const unsigned *alpha_dst_factor;
804    struct pipe_blend_state blend;
805    enum vector_mode mode;
806    const struct lp_type *type;
807    unsigned long i;
808    boolean success = TRUE;
809 
810    for(i = 0; i < n; ++i) {
811       rgb_func = &blend_funcs[rand() % num_funcs];
812       alpha_func = &blend_funcs[rand() % num_funcs];
813       rgb_src_factor = &blend_factors[rand() % num_factors];
814       alpha_src_factor = &blend_factors[rand() % num_factors];
815 
816       do {
817          rgb_dst_factor = &blend_factors[rand() % num_factors];
818       } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
819 
820       do {
821          alpha_dst_factor = &blend_factors[rand() % num_factors];
822       } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
823 
824       mode = rand() & 1;
825 
826       type = &blend_types[rand() % num_types];
827 
828       memset(&blend, 0, sizeof blend);
829       blend.rt[0].blend_enable      = 1;
830       blend.rt[0].rgb_func          = *rgb_func;
831       blend.rt[0].rgb_src_factor    = *rgb_src_factor;
832       blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
833       blend.rt[0].alpha_func        = *alpha_func;
834       blend.rt[0].alpha_src_factor  = *alpha_src_factor;
835       blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
836       blend.rt[0].colormask         = PIPE_MASK_RGBA;
837 
838       if(!test_one(verbose, fp, &blend, mode, *type))
839         success = FALSE;
840    }
841 
842    return success;
843 }
844 
845 
846 boolean
test_single(unsigned verbose,FILE * fp)847 test_single(unsigned verbose, FILE *fp)
848 {
849    printf("no test_single()");
850    return TRUE;
851 }
852