1 /*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "include/core/SkColorPriv.h"
9 #include "include/private/SkColorData.h"
10 #include "src/core/SkCpu.h"
11 #include "src/core/SkMSAN.h"
12 #include "src/core/SkVM.h"
13 #include "tests/Test.h"
14
15 template <typename Fn>
test_jit_and_interpreter(const skvm::Builder & b,Fn && test)16 static void test_jit_and_interpreter(const skvm::Builder& b, Fn&& test) {
17 skvm::Program p = b.done();
18 test(p);
19 if (p.hasJIT()) {
20 test(b.done(/*debug_name=*/nullptr, /*allow_jit=*/false));
21 }
22 }
23
DEF_TEST(SkVM_eliminate_dead_code,r)24 DEF_TEST(SkVM_eliminate_dead_code, r) {
25 skvm::Builder b;
26 {
27 skvm::Ptr arg = b.varying<int>();
28 skvm::I32 l = b.load32(arg);
29 skvm::I32 a = b.add(l, l);
30 b.add(a, b.splat(7));
31 }
32
33 std::vector<skvm::Instruction> program = b.program();
34 REPORTER_ASSERT(r, program.size() == 4);
35
36 program = skvm::eliminate_dead_code(program);
37 REPORTER_ASSERT(r, program.size() == 0);
38 }
39
DEF_TEST(SkVM_Pointless,r)40 DEF_TEST(SkVM_Pointless, r) {
41 // Let's build a program with no memory arguments.
42 // It should all be pegged as dead code, but we should be able to "run" it.
43 skvm::Builder b;
44 {
45 b.add(b.splat(5.0f),
46 b.splat(4.0f));
47 }
48
49 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
50 for (int N = 0; N < 64; N++) {
51 program.eval(N);
52 }
53 });
54
55 for (const skvm::OptimizedInstruction& inst : b.optimize()) {
56 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
57 }
58 }
59
DEF_TEST(SkVM_memset,r)60 DEF_TEST(SkVM_memset, r) {
61 skvm::Builder b;
62 b.store32(b.varying<int>(), b.splat(42));
63
64 test_jit_and_interpreter(b, [&](const skvm::Program& p) {
65 int buf[18];
66 buf[17] = 47;
67
68 p.eval(17, buf);
69 for (int i = 0; i < 17; i++) {
70 REPORTER_ASSERT(r, buf[i] == 42);
71 }
72 REPORTER_ASSERT(r, buf[17] == 47);
73 });
74 }
75
DEF_TEST(SkVM_memcpy,r)76 DEF_TEST(SkVM_memcpy, r) {
77 skvm::Builder b;
78 {
79 auto src = b.varying<int>(),
80 dst = b.varying<int>();
81 b.store32(dst, b.load32(src));
82 }
83
84 test_jit_and_interpreter(b, [&](const skvm::Program& p) {
85 int src[] = {1,2,3,4,5,6,7,8,9},
86 dst[] = {0,0,0,0,0,0,0,0,0};
87
88 p.eval(SK_ARRAY_COUNT(src)-1, src, dst);
89 for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) {
90 REPORTER_ASSERT(r, dst[i] == src[i]);
91 }
92 size_t i = SK_ARRAY_COUNT(src)-1;
93 REPORTER_ASSERT(r, dst[i] == 0);
94 });
95 }
96
DEF_TEST(SkVM_allow_jit,r)97 DEF_TEST(SkVM_allow_jit, r) {
98 skvm::Builder b;
99 {
100 auto src = b.varying<int>(),
101 dst = b.varying<int>();
102 b.store32(dst, b.load32(src));
103 }
104
105 if (b.done("", /*allow_jit=*/true).hasJIT()) {
106 REPORTER_ASSERT(r, !b.done("", false).hasJIT());
107 }
108 }
109
DEF_TEST(SkVM_LoopCounts,r)110 DEF_TEST(SkVM_LoopCounts, r) {
111 // Make sure we cover all the exact N we want.
112
113 // buf[i] += 1
114 skvm::Builder b;
115 skvm::Ptr arg = b.varying<int>();
116 b.store32(arg,
117 b.add(b.splat(1),
118 b.load32(arg)));
119
120 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
121 int buf[64];
122 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
123 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
124 buf[i] = i;
125 }
126 program.eval(N, buf);
127
128 for (int i = 0; i < N; i++) {
129 REPORTER_ASSERT(r, buf[i] == i+1);
130 }
131 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
132 REPORTER_ASSERT(r, buf[i] == i);
133 }
134 }
135 });
136 }
137
DEF_TEST(SkVM_gather32,r)138 DEF_TEST(SkVM_gather32, r) {
139 skvm::Builder b;
140 {
141 skvm::Ptr uniforms = b.uniform(),
142 buf = b.varying<int>();
143 skvm::I32 x = b.load32(buf);
144 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
145 }
146
147 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
148 const int img[] = {12,34,56,78, 90,98,76,54};
149
150 int buf[20];
151 for (int i = 0; i < 20; i++) {
152 buf[i] = i;
153 }
154
155 struct Uniforms {
156 const int* img;
157 } uniforms{img};
158
159 program.eval(20, &uniforms, buf);
160 int i = 0;
161 REPORTER_ASSERT(r, buf[i] == 12); i++;
162 REPORTER_ASSERT(r, buf[i] == 34); i++;
163 REPORTER_ASSERT(r, buf[i] == 56); i++;
164 REPORTER_ASSERT(r, buf[i] == 78); i++;
165 REPORTER_ASSERT(r, buf[i] == 90); i++;
166 REPORTER_ASSERT(r, buf[i] == 98); i++;
167 REPORTER_ASSERT(r, buf[i] == 76); i++;
168 REPORTER_ASSERT(r, buf[i] == 54); i++;
169
170 REPORTER_ASSERT(r, buf[i] == 12); i++;
171 REPORTER_ASSERT(r, buf[i] == 34); i++;
172 REPORTER_ASSERT(r, buf[i] == 56); i++;
173 REPORTER_ASSERT(r, buf[i] == 78); i++;
174 REPORTER_ASSERT(r, buf[i] == 90); i++;
175 REPORTER_ASSERT(r, buf[i] == 98); i++;
176 REPORTER_ASSERT(r, buf[i] == 76); i++;
177 REPORTER_ASSERT(r, buf[i] == 54); i++;
178
179 REPORTER_ASSERT(r, buf[i] == 12); i++;
180 REPORTER_ASSERT(r, buf[i] == 34); i++;
181 REPORTER_ASSERT(r, buf[i] == 56); i++;
182 REPORTER_ASSERT(r, buf[i] == 78); i++;
183 });
184 }
185
DEF_TEST(SkVM_gathers,r)186 DEF_TEST(SkVM_gathers, r) {
187 skvm::Builder b;
188 {
189 skvm::Ptr uniforms = b.uniform(),
190 buf32 = b.varying<int>(),
191 buf16 = b.varying<uint16_t>(),
192 buf8 = b.varying<uint8_t>();
193
194 skvm::I32 x = b.load32(buf32);
195
196 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
197 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
198 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
199 }
200
201 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
202 const int img[] = {12,34,56,78, 90,98,76,54};
203
204 constexpr int N = 20;
205 int buf32[N];
206 uint16_t buf16[N];
207 uint8_t buf8 [N];
208
209 for (int i = 0; i < 20; i++) {
210 buf32[i] = i;
211 }
212
213 struct Uniforms {
214 const int* img;
215 } uniforms{img};
216
217 program.eval(N, &uniforms, buf32, buf16, buf8);
218 int i = 0;
219 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
220 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
221 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
222 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
223 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
224 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
225 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
226 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
227
228 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
229 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
230 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
231 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
232 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
233 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
234 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
235 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
236
237 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
238 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
239 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
240 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
241 });
242 }
243
DEF_TEST(SkVM_gathers2,r)244 DEF_TEST(SkVM_gathers2, r) {
245 skvm::Builder b;
246 {
247 skvm::Ptr uniforms = b.uniform(),
248 buf32 = b.varying<int>(),
249 buf16 = b.varying<uint16_t>(),
250 buf8 = b.varying<uint8_t>();
251
252 skvm::I32 x = b.load32(buf32);
253
254 b.store32(buf32, b.gather32(uniforms,0, x));
255 b.store16(buf16, b.gather16(uniforms,0, x));
256 b.store8 (buf8 , b.gather8 (uniforms,0, x));
257 }
258
259 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
260 uint8_t img[256];
261 for (int i = 0; i < 256; i++) {
262 img[i] = i;
263 }
264
265 int buf32[64];
266 uint16_t buf16[64];
267 uint8_t buf8 [64];
268
269 for (int i = 0; i < 64; i++) {
270 buf32[i] = (i*47)&63;
271 buf16[i] = 0;
272 buf8 [i] = 0;
273 }
274
275 struct Uniforms {
276 const uint8_t* img;
277 } uniforms{img};
278
279 program.eval(64, &uniforms, buf32, buf16, buf8);
280
281 for (int i = 0; i < 64; i++) {
282 REPORTER_ASSERT(r, buf8[i] == ((i*47)&63)); // 0,47,30,13,60,...
283 }
284
285 REPORTER_ASSERT(r, buf16[ 0] == 0x0100);
286 REPORTER_ASSERT(r, buf16[63] == 0x2322);
287
288 REPORTER_ASSERT(r, buf32[ 0] == 0x03020100);
289 REPORTER_ASSERT(r, buf32[63] == 0x47464544);
290 });
291 }
292
DEF_TEST(SkVM_bitops,r)293 DEF_TEST(SkVM_bitops, r) {
294 skvm::Builder b;
295 {
296 skvm::Ptr ptr = b.varying<int>();
297
298 skvm::I32 x = b.load32(ptr);
299
300 x = b.bit_and (x, b.splat(0xf1)); // 0x40
301 x = b.bit_or (x, b.splat(0x80)); // 0xc0
302 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
303 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
304
305 x = b.shl(x, 28); // 0xe000'0000
306 x = b.sra(x, 28); // 0xffff'fffe
307 x = b.shr(x, 1); // 0x7fff'ffff
308
309 b.store32(ptr, x);
310 }
311
312 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
313 int x = 0x42;
314 program.eval(1, &x);
315 REPORTER_ASSERT(r, x == 0x7fff'ffff);
316 });
317 }
318
DEF_TEST(SkVM_select_is_NaN,r)319 DEF_TEST(SkVM_select_is_NaN, r) {
320 skvm::Builder b;
321 {
322 skvm::Ptr src = b.varying<float>(),
323 dst = b.varying<float>();
324
325 skvm::F32 x = b.loadF(src);
326 x = select(is_NaN(x), b.splat(0.0f)
327 , x);
328 b.storeF(dst, x);
329 }
330
331 std::vector<skvm::OptimizedInstruction> program = b.optimize();
332 REPORTER_ASSERT(r, program.size() == 4);
333 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
334 REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
335 REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
336 REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
337
338 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
339 // ±NaN, ±0, ±1, ±inf
340 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
341 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
342 uint32_t dst[SK_ARRAY_COUNT(src)];
343 program.eval(SK_ARRAY_COUNT(src), src, dst);
344
345 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
346 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
347 }
348 });
349 }
350
DEF_TEST(SkVM_f32,r)351 DEF_TEST(SkVM_f32, r) {
352 skvm::Builder b;
353 {
354 skvm::Ptr arg = b.varying<float>();
355
356 skvm::F32 x = b.loadF(arg),
357 y = b.add(x,x), // y = 2x
358 z = b.sub(y,x), // z = 2x-x = x
359 w = b.div(z,x); // w = x/x = 1
360 b.storeF(arg, w);
361 }
362
363 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
364 float buf[] = { 1,2,3,4,5,6,7,8,9 };
365 program.eval(SK_ARRAY_COUNT(buf), buf);
366 for (float v : buf) {
367 REPORTER_ASSERT(r, v == 1.0f);
368 }
369 });
370 }
371
DEF_TEST(SkVM_cmp_i32,r)372 DEF_TEST(SkVM_cmp_i32, r) {
373 skvm::Builder b;
374 {
375 skvm::I32 x = b.load32(b.varying<int>());
376
377 auto to_bit = [&](int shift, skvm::I32 mask) {
378 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
379 };
380
381 skvm::I32 m = b.splat(0);
382 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
383 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
384 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
385 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
386 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
387 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
388
389 b.store32(b.varying<int>(), m);
390 }
391 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
392 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
393 int out[SK_ARRAY_COUNT(in)];
394
395 program.eval(SK_ARRAY_COUNT(in), in, out);
396
397 REPORTER_ASSERT(r, out[0] == 0b001111);
398 REPORTER_ASSERT(r, out[1] == 0b001100);
399 REPORTER_ASSERT(r, out[2] == 0b001010);
400 REPORTER_ASSERT(r, out[3] == 0b001010);
401 REPORTER_ASSERT(r, out[4] == 0b000010);
402 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
403 REPORTER_ASSERT(r, out[i] == 0b110010);
404 }
405 });
406 }
407
DEF_TEST(SkVM_cmp_f32,r)408 DEF_TEST(SkVM_cmp_f32, r) {
409 skvm::Builder b;
410 {
411 skvm::F32 x = b.loadF(b.varying<float>());
412
413 auto to_bit = [&](int shift, skvm::I32 mask) {
414 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
415 };
416
417 skvm::I32 m = b.splat(0);
418 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
419 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
420 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
421 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
422 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
423 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
424
425 b.store32(b.varying<int>(), m);
426 }
427
428 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
429 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
430 int out[SK_ARRAY_COUNT(in)];
431
432 program.eval(SK_ARRAY_COUNT(in), in, out);
433
434 REPORTER_ASSERT(r, out[0] == 0b001111);
435 REPORTER_ASSERT(r, out[1] == 0b001100);
436 REPORTER_ASSERT(r, out[2] == 0b001010);
437 REPORTER_ASSERT(r, out[3] == 0b001010);
438 REPORTER_ASSERT(r, out[4] == 0b000010);
439 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
440 REPORTER_ASSERT(r, out[i] == 0b110010);
441 }
442 });
443 }
444
DEF_TEST(SkVM_index,r)445 DEF_TEST(SkVM_index, r) {
446 skvm::Builder b;
447 b.store32(b.varying<int>(), b.index());
448
449 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
450 int buf[23];
451 program.eval(SK_ARRAY_COUNT(buf), buf);
452 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
453 REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i);
454 }
455 });
456 }
457
DEF_TEST(SkVM_mad,r)458 DEF_TEST(SkVM_mad, r) {
459 // This program is designed to exercise the tricky corners of instruction
460 // and register selection for Op::mad_f32.
461
462 skvm::Builder b;
463 {
464 skvm::Ptr arg = b.varying<int>();
465
466 skvm::F32 x = b.to_F32(b.load32(arg)),
467 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
468 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
469 w = b.mad(z,z,y), // w can alias z but not y.
470 v = b.mad(w,y,w); // Got to stop somewhere.
471 b.store32(arg, b.trunc(v));
472 }
473
474 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
475 int x = 2;
476 program.eval(1, &x);
477 // x = 2
478 // y = 2*2 + 2 = 6
479 // z = 6*6 + 2 = 38
480 // w = 38*38 + 6 = 1450
481 // v = 1450*6 + 1450 = 10150
482 REPORTER_ASSERT(r, x == 10150);
483 });
484 }
485
DEF_TEST(SkVM_fms,r)486 DEF_TEST(SkVM_fms, r) {
487 // Create a pattern that can be peepholed into an Op::fms_f32.
488 skvm::Builder b;
489 {
490 skvm::Ptr arg = b.varying<int>();
491
492 skvm::F32 x = b.to_F32(b.load32(arg)),
493 v = b.sub(b.mul(x, b.splat(2.0f)),
494 b.splat(1.0f));
495 b.store32(arg, b.trunc(v));
496 }
497
498 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
499 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
500 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
501
502 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
503 REPORTER_ASSERT(r, buf[i] = 2*i-1);
504 }
505 });
506 }
507
DEF_TEST(SkVM_fnma,r)508 DEF_TEST(SkVM_fnma, r) {
509 // Create a pattern that can be peepholed into an Op::fnma_f32.
510 skvm::Builder b;
511 {
512 skvm::Ptr arg = b.varying<int>();
513
514 skvm::F32 x = b.to_F32(b.load32(arg)),
515 v = b.sub(b.splat(1.0f),
516 b.mul(x, b.splat(2.0f)));
517 b.store32(arg, b.trunc(v));
518 }
519
520 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
521 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
522 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
523
524 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
525 REPORTER_ASSERT(r, buf[i] = 1-2*i);
526 }
527 });
528 }
529
DEF_TEST(SkVM_madder,r)530 DEF_TEST(SkVM_madder, r) {
531 skvm::Builder b;
532 {
533 skvm::Ptr arg = b.varying<float>();
534
535 skvm::F32 x = b.loadF(arg),
536 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
537 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
538 w = b.mad(y,y,z);
539 b.storeF(arg, w);
540 }
541
542 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
543 float x = 2.0f;
544 // y = 2*2 + 2 = 6
545 // z = 6*2 + 6 = 18
546 // w = 6*6 + 18 = 54
547 program.eval(1, &x);
548 REPORTER_ASSERT(r, x == 54.0f);
549 });
550 }
551
DEF_TEST(SkVM_floor,r)552 DEF_TEST(SkVM_floor, r) {
553 skvm::Builder b;
554 {
555 skvm::Ptr arg = b.varying<float>();
556 b.storeF(arg, b.floor(b.loadF(arg)));
557 }
558
559 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
560 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
561 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
562 program.eval(SK_ARRAY_COUNT(buf), buf);
563 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
564 REPORTER_ASSERT(r, buf[i] == want[i]);
565 }
566 });
567 }
568
DEF_TEST(SkVM_round,r)569 DEF_TEST(SkVM_round, r) {
570 skvm::Builder b;
571 {
572 skvm::Ptr src = b.varying<float>();
573 skvm::Ptr dst = b.varying<int>();
574 b.store32(dst, b.round(b.loadF(src)));
575 }
576
577 // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
578 // We haven't explicitly guaranteed that here... it just probably is.
579 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
580 float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
581 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 };
582 int dst[SK_ARRAY_COUNT(buf)];
583
584 program.eval(SK_ARRAY_COUNT(buf), buf, dst);
585 for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) {
586 REPORTER_ASSERT(r, dst[i] == want[i]);
587 }
588 });
589 }
590
DEF_TEST(SkVM_min,r)591 DEF_TEST(SkVM_min, r) {
592 skvm::Builder b;
593 {
594 skvm::Ptr src1 = b.varying<float>();
595 skvm::Ptr src2 = b.varying<float>();
596 skvm::Ptr dst = b.varying<float>();
597
598 b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
599 }
600
601 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
602 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
603 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
604 float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
605 float d[SK_ARRAY_COUNT(s1)];
606 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
607 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
608 REPORTER_ASSERT(r, d[i] == want[i]);
609 }
610 });
611 }
612
DEF_TEST(SkVM_max,r)613 DEF_TEST(SkVM_max, r) {
614 skvm::Builder b;
615 {
616 skvm::Ptr src1 = b.varying<float>();
617 skvm::Ptr src2 = b.varying<float>();
618 skvm::Ptr dst = b.varying<float>();
619
620 b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
621 }
622
623 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
624 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
625 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
626 float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f};
627 float d[SK_ARRAY_COUNT(s1)];
628 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
629 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
630 REPORTER_ASSERT(r, d[i] == want[i]);
631 }
632 });
633 }
634
DEF_TEST(SkVM_hoist,r)635 DEF_TEST(SkVM_hoist, r) {
636 // This program uses enough constants that it will fail to JIT if we hoist them.
637 // The JIT will try again without hoisting, and that'll just need 2 registers.
638 skvm::Builder b;
639 {
640 skvm::Ptr arg = b.varying<int>();
641 skvm::I32 x = b.load32(arg);
642 for (int i = 0; i < 32; i++) {
643 x = b.add(x, b.splat(i));
644 }
645 b.store32(arg, x);
646 }
647
648 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
649 int x = 4;
650 program.eval(1, &x);
651 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
652 // x += 496
653 REPORTER_ASSERT(r, x == 500);
654 });
655 }
656
DEF_TEST(SkVM_select,r)657 DEF_TEST(SkVM_select, r) {
658 skvm::Builder b;
659 {
660 skvm::Ptr buf = b.varying<int>();
661
662 skvm::I32 x = b.load32(buf);
663
664 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
665
666 b.store32(buf, x);
667 }
668
669 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
670 int buf[] = { 0,1,2,3,4,5,6,7,8 };
671 program.eval(SK_ARRAY_COUNT(buf), buf);
672 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
673 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
674 }
675 });
676 }
677
DEF_TEST(SkVM_swap,r)678 DEF_TEST(SkVM_swap, r) {
679 skvm::Builder b;
680 {
681 // This program is the equivalent of
682 // x = *X
683 // y = *Y
684 // *X = y
685 // *Y = x
686 // One rescheduling of the program based only on data flow of Op arguments is
687 // x = *X
688 // *Y = x
689 // y = *Y
690 // *X = y
691 // but this reordering does not produce the same results and is invalid.
692 skvm::Ptr X = b.varying<int>(),
693 Y = b.varying<int>();
694
695 skvm::I32 x = b.load32(X),
696 y = b.load32(Y);
697
698 b.store32(X, y);
699 b.store32(Y, x);
700 }
701
702 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
703 int b1[] = { 0,1,2,3 };
704 int b2[] = { 4,5,6,7 };
705 program.eval(SK_ARRAY_COUNT(b1), b1, b2);
706 for (int i = 0; i < (int)SK_ARRAY_COUNT(b1); i++) {
707 REPORTER_ASSERT(r, b1[i] == 4 + i);
708 REPORTER_ASSERT(r, b2[i] == i);
709 }
710 });
711 }
712
DEF_TEST(SkVM_NewOps,r)713 DEF_TEST(SkVM_NewOps, r) {
714 // Exercise a somewhat arbitrary set of new ops.
715 skvm::Builder b;
716 {
717 skvm::Ptr buf = b.varying<int16_t>(),
718 uniforms = b.uniform();
719
720 skvm::I32 x = b.load16(buf);
721
722 const size_t kPtr = sizeof(const int*);
723
724 x = b.add(x, b.uniform32(uniforms, kPtr+0));
725 x = b.mul(x, b.uniform32(uniforms, kPtr+4));
726 x = b.sub(x, b.uniform32(uniforms, kPtr+8));
727
728 skvm::I32 limit = b.uniform32(uniforms, kPtr+12);
729 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
730 x = b.select(b.gt(x, limit ), limit , x);
731
732 x = b.gather8(uniforms,0, x);
733
734 b.store16(buf, x);
735 }
736
737 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
738 const int N = 31;
739 int16_t buf[N];
740 for (int i = 0; i < N; i++) {
741 buf[i] = i;
742 }
743
744 const int M = 16;
745 uint8_t img[M];
746 for (int i = 0; i < M; i++) {
747 img[i] = i*i;
748 }
749
750 struct {
751 const uint8_t* img;
752 int add = 5;
753 int mul = 3;
754 int sub = 18;
755 int limit = M-1;
756 } uniforms{img};
757
758 program.eval(N, buf, &uniforms);
759
760 for (int i = 0; i < N; i++) {
761 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
762 int x = 3*(i-1);
763
764 // Then that's pinned to the limits of img.
765 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
766 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
767 REPORTER_ASSERT(r, buf[i] == img[x]);
768 }
769 });
770 }
771
DEF_TEST(SkVM_sqrt,r)772 DEF_TEST(SkVM_sqrt, r) {
773 skvm::Builder b;
774 auto buf = b.varying<int>();
775 b.storeF(buf, b.sqrt(b.loadF(buf)));
776
777 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
778 constexpr int K = 17;
779 float buf[K];
780 for (int i = 0; i < K; i++) {
781 buf[i] = (float)(i*i);
782 }
783
784 // x^2 -> x
785 program.eval(K, buf);
786
787 for (int i = 0; i < K; i++) {
788 REPORTER_ASSERT(r, buf[i] == (float)i);
789 }
790 });
791 }
792
DEF_TEST(SkVM_MSAN,r)793 DEF_TEST(SkVM_MSAN, r) {
794 // This little memset32() program should be able to JIT, but if we run that
795 // JIT code in an MSAN build, it won't see the writes initialize buf. So
796 // this tests that we're using the interpreter instead.
797 skvm::Builder b;
798 b.store32(b.varying<int>(), b.splat(42));
799
800 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
801 constexpr int K = 17;
802 int buf[K]; // Intentionally uninitialized.
803 program.eval(K, buf);
804 sk_msan_assert_initialized(buf, buf+K);
805 for (int x : buf) {
806 REPORTER_ASSERT(r, x == 42);
807 }
808 });
809 }
810
DEF_TEST(SkVM_assert,r)811 DEF_TEST(SkVM_assert, r) {
812 skvm::Builder b;
813 b.assert_true(b.lt(b.load32(b.varying<int>()),
814 b.splat(42)));
815
816 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
817 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
818 program.eval(SK_ARRAY_COUNT(buf), buf);
819 });
820 }
821
DEF_TEST(SkVM_premul,reporter)822 DEF_TEST(SkVM_premul, reporter) {
823 // Test that premul is short-circuited when alpha is known opaque.
824 {
825 skvm::Builder p;
826 auto rptr = p.varying<int>(),
827 aptr = p.varying<int>();
828
829 skvm::F32 r = p.loadF(rptr),
830 g = p.splat(0.0f),
831 b = p.splat(0.0f),
832 a = p.loadF(aptr);
833
834 p.premul(&r, &g, &b, a);
835 p.storeF(rptr, r);
836
837 // load red, load alpha, red *= alpha, store red
838 REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
839 }
840
841 {
842 skvm::Builder p;
843 auto rptr = p.varying<int>();
844
845 skvm::F32 r = p.loadF(rptr),
846 g = p.splat(0.0f),
847 b = p.splat(0.0f),
848 a = p.splat(1.0f);
849
850 p.premul(&r, &g, &b, a);
851 p.storeF(rptr, r);
852
853 // load red, store red
854 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
855 }
856
857 // Same deal for unpremul.
858 {
859 skvm::Builder p;
860 auto rptr = p.varying<int>(),
861 aptr = p.varying<int>();
862
863 skvm::F32 r = p.loadF(rptr),
864 g = p.splat(0.0f),
865 b = p.splat(0.0f),
866 a = p.loadF(aptr);
867
868 p.unpremul(&r, &g, &b, a);
869 p.storeF(rptr, r);
870
871 // load red, load alpha, a bunch of unpremul instructions, store red
872 REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
873 }
874
875 {
876 skvm::Builder p;
877 auto rptr = p.varying<int>();
878
879 skvm::F32 r = p.loadF(rptr),
880 g = p.splat(0.0f),
881 b = p.splat(0.0f),
882 a = p.splat(1.0f);
883
884 p.unpremul(&r, &g, &b, a);
885 p.storeF(rptr, r);
886
887 // load red, store red
888 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
889 }
890 }
891
892 template <typename Fn>
test_asm(skiatest::Reporter * r,Fn && fn,std::initializer_list<uint8_t> expected)893 static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
894 uint8_t buf[4096];
895 skvm::Assembler a{buf};
896 fn(a);
897
898 REPORTER_ASSERT(r, a.size() == expected.size());
899
900 auto got = (const uint8_t*)buf,
901 want = expected.begin();
902 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
903 REPORTER_ASSERT(r, got[i] == want[i],
904 "byte %d was %02x, want %02x", i, got[i], want[i]);
905 }
906 }
907
DEF_TEST(SkVM_Assembler,r)908 DEF_TEST(SkVM_Assembler, r) {
909 // Easiest way to generate test cases is
910 //
911 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
912 //
913 // The -x86-asm-syntax=intel bit is optional, controlling the
914 // input syntax only; the output will always be AT&T op x,y,dst style.
915 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
916 // that a bit easier to use here, despite maybe favoring AT&T overall.
917
918 using A = skvm::Assembler;
919 // Our exit strategy from AVX code.
920 test_asm(r, [&](A& a) {
921 a.int3();
922 a.vzeroupper();
923 a.ret();
924 },{
925 0xcc,
926 0xc5, 0xf8, 0x77,
927 0xc3,
928 });
929
930 // Align should pad with zero
931 test_asm(r, [&](A& a) {
932 a.ret();
933 a.align(4);
934 },{
935 0xc3,
936 0x00, 0x00, 0x00,
937 });
938
939 test_asm(r, [&](A& a) {
940 a.add(A::rax, 8); // Always good to test rax.
941 a.sub(A::rax, 32);
942
943 a.add(A::rdi, 12); // Last 0x48 REX
944 a.sub(A::rdi, 8);
945
946 a.add(A::r8 , 7); // First 0x49 REX
947 a.sub(A::r8 , 4);
948
949 a.add(A::rsi, 128); // Requires 4 byte immediate.
950 a.sub(A::r8 , 1000000);
951
952 a.add(A::Mem{A::rsi}, 7); // addq $7, (%rsi)
953 a.add(A::Mem{A::rsi, 12}, 7); // addq $7, 12(%rsi)
954 a.add(A::Mem{A::rsp, 12}, 7); // addq $7, 12(%rsp)
955 a.add(A::Mem{A::r12, 12}, 7); // addq $7, 12(%r12)
956 a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%rsp,%rax,4)
957 a.add(A::Mem{A::r12, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%r12,%rax,4)
958 a.add(A::Mem{A::rax, 12, A::r12, A::FOUR}, 7); // addq $7, 12(%rax,%r12,4)
959 a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7); // addq $7, 12(%r11,%r8,2)
960 a.add(A::Mem{A::r11, 12, A::rax} , 7); // addq $7, 12(%r11,%rax)
961 a.add(A::Mem{A::rax, 12, A::r11} , 7); // addq $7, 12(%rax,%r11)
962
963 a.sub(A::Mem{A::rax, 12, A::r11} , 7); // subq $7, 12(%rax,%r11)
964
965 a.add( A::rax , A::rcx); // addq %rcx, %rax
966 a.add(A::Mem{A::rax} , A::rcx); // addq %rcx, (%rax)
967 a.add(A::Mem{A::rax, 12}, A::rcx); // addq %rcx, 12(%rax)
968 a.add(A::rcx, A::Mem{A::rax, 12}); // addq 12(%rax), %rcx
969
970 a.sub(A::rcx, A::Mem{A::rax, 12}); // subq 12(%rax), %rcx
971 },{
972 0x48, 0x83, 0b11'000'000, 0x08,
973 0x48, 0x83, 0b11'101'000, 0x20,
974
975 0x48, 0x83, 0b11'000'111, 0x0c,
976 0x48, 0x83, 0b11'101'111, 0x08,
977
978 0x49, 0x83, 0b11'000'000, 0x07,
979 0x49, 0x83, 0b11'101'000, 0x04,
980
981 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
982 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
983
984 0x48,0x83,0x06,0x07,
985 0x48,0x83,0x46,0x0c,0x07,
986 0x48,0x83,0x44,0x24,0x0c,0x07,
987 0x49,0x83,0x44,0x24,0x0c,0x07,
988 0x48,0x83,0x44,0x84,0x0c,0x07,
989 0x49,0x83,0x44,0x84,0x0c,0x07,
990 0x4a,0x83,0x44,0xa0,0x0c,0x07,
991 0x4b,0x83,0x44,0x43,0x0c,0x07,
992 0x49,0x83,0x44,0x03,0x0c,0x07,
993 0x4a,0x83,0x44,0x18,0x0c,0x07,
994
995 0x4a,0x83,0x6c,0x18,0x0c,0x07,
996
997 0x48,0x01,0xc8,
998 0x48,0x01,0x08,
999 0x48,0x01,0x48,0x0c,
1000 0x48,0x03,0x48,0x0c,
1001 0x48,0x2b,0x48,0x0c,
1002 });
1003
1004
1005 test_asm(r, [&](A& a) {
1006 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
1007 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
1008 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
1009 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
1010 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
1011 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
1012 },{
1013 /* VEX */ /*op*/ /*modRM*/
1014 0xc5, 0xf5, 0xfe, 0xc2,
1015 0xc5, 0x75, 0xfe, 0xc2,
1016 0xc5, 0xbd, 0xfe, 0xc2,
1017 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1018 0xc4, 0xe2, 0x75, 0x40, 0xc2,
1019 0xc5, 0xf5, 0xfa, 0xc2,
1020 });
1021
1022 test_asm(r, [&](A& a) {
1023 a.vpaddw (A::ymm4, A::ymm3, A::ymm2);
1024 a.vpavgw (A::ymm4, A::ymm3, A::ymm2);
1025 a.vpcmpeqw (A::ymm4, A::ymm3, A::ymm2);
1026 a.vpcmpgtw (A::ymm4, A::ymm3, A::ymm2);
1027
1028 a.vpminsw (A::ymm4, A::ymm3, A::ymm2);
1029 a.vpmaxsw (A::ymm4, A::ymm3, A::ymm2);
1030 a.vpminuw (A::ymm4, A::ymm3, A::ymm2);
1031 a.vpmaxuw (A::ymm4, A::ymm3, A::ymm2);
1032
1033 a.vpmulhrsw(A::ymm4, A::ymm3, A::ymm2);
1034 a.vpabsw (A::ymm4, A::ymm3);
1035 a.vpsllw (A::ymm4, A::ymm3, 12);
1036 a.vpsraw (A::ymm4, A::ymm3, 12);
1037 },{
1038 0xc5, 0xe5, 0xfd, 0xe2,
1039 0xc5, 0xe5, 0xe3, 0xe2,
1040 0xc5, 0xe5, 0x75, 0xe2,
1041 0xc5, 0xe5, 0x65, 0xe2,
1042
1043 0xc5, 0xe5, 0xea, 0xe2,
1044 0xc5, 0xe5, 0xee, 0xe2,
1045 0xc4,0xe2,0x65, 0x3a, 0xe2,
1046 0xc4,0xe2,0x65, 0x3e, 0xe2,
1047
1048 0xc4,0xe2,0x65, 0x0b, 0xe2,
1049 0xc4,0xe2,0x7d, 0x1d, 0xe3,
1050 0xc5,0xdd,0x71, 0xf3, 0x0c,
1051 0xc5,0xdd,0x71, 0xe3, 0x0c,
1052 });
1053
1054 test_asm(r, [&](A& a) {
1055 A::Label l;
1056 a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0
1057 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1058 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1059 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1060 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1061 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1062 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
1063 a.label(&l); // 28 bytes after the vcmpeqps that uses it.
1064 },{
1065 0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00,
1066 0xc5,0xf5,0x76,0xc2,
1067 0xc5,0xf5,0x66,0xc2,
1068 0xc5,0xf4,0xc2,0xc2,0x00,
1069 0xc5,0xf4,0xc2,0xc2,0x01,
1070 0xc5,0xf4,0xc2,0xc2,0x02,
1071 0xc5,0xf4,0xc2,0xc2,0x04,
1072 });
1073
1074 test_asm(r, [&](A& a) {
1075 a.vminps(A::ymm0, A::ymm1, A::ymm2);
1076 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1077 },{
1078 0xc5,0xf4,0x5d,0xc2,
1079 0xc5,0xf4,0x5f,0xc2,
1080 });
1081
1082 test_asm(r, [&](A& a) {
1083 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1084 },{
1085 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1086 });
1087
1088 test_asm(r, [&](A& a) {
1089 a.vpsrld(A::ymm15, A::ymm2, 8);
1090 a.vpsrld(A::ymm0 , A::ymm8, 5);
1091 },{
1092 0xc5, 0x85, 0x72,0xd2, 0x08,
1093 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1094 });
1095
1096 test_asm(r, [&](A& a) {
1097 A::Label l;
1098 a.vpermps(A::ymm1, A::ymm2, A::Mem{A::rdi, 32});
1099 a.vperm2f128(A::ymm1, A::ymm2, &l, 0x20);
1100 a.vpermq(A::ymm1, A::ymm2, 5);
1101 a.label(&l); // 6 bytes after vperm2f128
1102 },{
1103 0xc4,0xe2,0x6d,0x16,0x4f,0x20,
1104 0xc4,0xe3,0x6d,0x06,0x0d,0x06,0x00,0x00,0x00,0x20,
1105 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1106 });
1107
1108 test_asm(r, [&](A& a) {
1109 a.vpunpckldq(A::ymm1, A::ymm2, A::Mem{A::rdi});
1110 a.vpunpckhdq(A::ymm1, A::ymm2, A::ymm3);
1111 },{
1112 0xc5,0xed,0x62,0x0f,
1113 0xc5,0xed,0x6a,0xcb,
1114 });
1115
1116 test_asm(r, [&](A& a) {
1117 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1118 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1119 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1120 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1121 },{
1122 0xc4,0xe3,0x7d,0x08,0xca,0x00,
1123 0xc4,0xe3,0x7d,0x08,0xca,0x01,
1124 0xc4,0xe3,0x7d,0x08,0xca,0x02,
1125 0xc4,0xe3,0x7d,0x08,0xca,0x03,
1126 });
1127
1128 test_asm(r, [&](A& a) {
1129 A::Label l;
1130 a.label(&l);
1131 a.byte(1);
1132 a.byte(2);
1133 a.byte(3);
1134 a.byte(4);
1135
1136 a.vbroadcastss(A::ymm0 , &l);
1137 a.vbroadcastss(A::ymm1 , &l);
1138 a.vbroadcastss(A::ymm8 , &l);
1139 a.vbroadcastss(A::ymm15, &l);
1140
1141 a.vpshufb(A::ymm4, A::ymm3, &l);
1142 a.vpaddd (A::ymm4, A::ymm3, &l);
1143 a.vpsubd (A::ymm4, A::ymm3, &l);
1144
1145 a.vptest(A::ymm4, &l);
1146
1147 a.vmulps (A::ymm4, A::ymm3, &l);
1148 },{
1149 0x01, 0x02, 0x03, 0x4,
1150
1151 /* VEX */ /*op*/ /* ModRM */ /* offset */
1152 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1153 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1154 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1155 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
1156
1157 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
1158
1159 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
1160 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
1161
1162 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
1163
1164 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
1165 });
1166
1167 test_asm(r, [&](A& a) {
1168 a.vbroadcastss(A::ymm0, A::Mem{A::rdi, 0});
1169 a.vbroadcastss(A::ymm13, A::Mem{A::r14, 7});
1170 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, -12});
1171 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, 400});
1172
1173 a.vbroadcastss(A::ymm8, A::xmm0);
1174 a.vbroadcastss(A::ymm0, A::xmm13);
1175 },{
1176 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1177 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1178 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1179 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1180 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
1181
1182 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1183 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
1184 });
1185
1186 test_asm(r, [&](A& a) {
1187 A::Label l;
1188 a.label(&l);
1189 a.jne(&l);
1190 a.jne(&l);
1191 a.je (&l);
1192 a.jmp(&l);
1193 a.jl (&l);
1194 a.jc (&l);
1195
1196 a.cmp(A::rdx, 1);
1197 a.cmp(A::rax, 12);
1198 a.cmp(A::r14, 2000000000);
1199 },{
1200 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1201 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1202 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1203 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1204 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
1205 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
1206
1207 0x48,0x83,0xfa,0x01,
1208 0x48,0x83,0xf8,0x0c,
1209 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
1210 });
1211
1212 test_asm(r, [&](A& a) {
1213 a.vmovups(A::ymm5, A::Mem{A::rsi});
1214 a.vmovups(A::Mem{A::rsi}, A::ymm5);
1215
1216 a.vmovups(A::xmm5, A::Mem{A::rsi});
1217 a.vmovups(A::Mem{A::rsi}, A::xmm5);
1218
1219 a.vpmovzxwd(A::ymm4, A::Mem{A::rsi});
1220 a.vpmovzxbd(A::ymm4, A::Mem{A::rsi});
1221
1222 a.vmovq(A::Mem{A::rdx}, A::xmm15);
1223 },{
1224 /* VEX */ /*Op*/ /* ModRM */
1225 0xc5, 0xfc, 0x10, 0b00'101'110,
1226 0xc5, 0xfc, 0x11, 0b00'101'110,
1227
1228 0xc5, 0xf8, 0x10, 0b00'101'110,
1229 0xc5, 0xf8, 0x11, 0b00'101'110,
1230
1231 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
1232 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
1233
1234 0xc5, 0x79, 0xd6, 0b00'111'010,
1235 });
1236
1237 test_asm(r, [&](A& a) {
1238 a.vmovups(A::ymm5, A::Mem{A::rsp, 0});
1239 a.vmovups(A::ymm5, A::Mem{A::rsp, 64});
1240 a.vmovups(A::ymm5, A::Mem{A::rsp,128});
1241
1242 a.vmovups(A::Mem{A::rsp, 0}, A::ymm5);
1243 a.vmovups(A::Mem{A::rsp, 64}, A::ymm5);
1244 a.vmovups(A::Mem{A::rsp,128}, A::ymm5);
1245 },{
1246 0xc5,0xfc,0x10,0x2c,0x24,
1247 0xc5,0xfc,0x10,0x6c,0x24,0x40,
1248 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00,
1249
1250 0xc5,0xfc,0x11,0x2c,0x24,
1251 0xc5,0xfc,0x11,0x6c,0x24,0x40,
1252 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00,
1253 });
1254
1255 test_asm(r, [&](A& a) {
1256 a.movzbq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1257 a.movzbq(A::rax, A::Mem{A::r8,}); // High src register.
1258 a.movzbq(A::r8 , A::Mem{A::rsi}); // High dst register.
1259 a.movzbq(A::r8, A::Mem{A::rsi, 12});
1260 a.movzbq(A::r8, A::Mem{A::rsi, 400});
1261
1262 a.movzwq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1263 a.movzwq(A::rax, A::Mem{A::r8,}); // High src register.
1264 a.movzwq(A::r8 , A::Mem{A::rsi}); // High dst register.
1265 a.movzwq(A::r8, A::Mem{A::rsi, 12});
1266 a.movzwq(A::r8, A::Mem{A::rsi, 400});
1267
1268 a.vmovd(A::Mem{A::rax}, A::xmm0);
1269 a.vmovd(A::Mem{A::rax}, A::xmm8);
1270 a.vmovd(A::Mem{A::r8 }, A::xmm0);
1271
1272 a.vmovd(A::xmm0, A::Mem{A::rax});
1273 a.vmovd(A::xmm8, A::Mem{A::rax});
1274 a.vmovd(A::xmm0, A::Mem{A::r8 });
1275
1276 a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR});
1277 a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8, A::TWO });
1278 a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx});
1279
1280 a.vmovd(A::rax, A::xmm0);
1281 a.vmovd(A::rax, A::xmm8);
1282 a.vmovd(A::r8 , A::xmm0);
1283
1284 a.vmovd(A::xmm0, A::rax);
1285 a.vmovd(A::xmm8, A::rax);
1286 a.vmovd(A::xmm0, A::r8 );
1287
1288 a.movb(A::Mem{A::rdx}, A::rax);
1289 a.movb(A::Mem{A::rdx}, A::r8 );
1290 a.movb(A::Mem{A::r8 }, A::rax);
1291
1292 a.movb(A::rdx, A::Mem{A::rax});
1293 a.movb(A::rdx, A::Mem{A::r8 });
1294 a.movb(A::r8 , A::Mem{A::rax});
1295
1296 a.movb(A::rdx, 12);
1297 a.movb(A::rax, 4);
1298 a.movb(A::r8 , -1);
1299
1300 a.movb(A::Mem{A::rdx}, 12);
1301 a.movb(A::Mem{A::rax}, 4);
1302 a.movb(A::Mem{A::r8 }, -1);
1303 },{
1304 0x48,0x0f,0xb6,0x06, // movzbq (%rsi), %rax
1305 0x49,0x0f,0xb6,0x00,
1306 0x4c,0x0f,0xb6,0x06,
1307 0x4c,0x0f,0xb6,0x46, 12,
1308 0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
1309
1310 0x48,0x0f,0xb7,0x06, // movzwq (%rsi), %rax
1311 0x49,0x0f,0xb7,0x00,
1312 0x4c,0x0f,0xb7,0x06,
1313 0x4c,0x0f,0xb7,0x46, 12,
1314 0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00,
1315
1316 0xc5,0xf9,0x7e,0x00,
1317 0xc5,0x79,0x7e,0x00,
1318 0xc4,0xc1,0x79,0x7e,0x00,
1319
1320 0xc5,0xf9,0x6e,0x00,
1321 0xc5,0x79,0x6e,0x00,
1322 0xc4,0xc1,0x79,0x6e,0x00,
1323
1324 0xc5,0xf9,0x6e,0x04,0x88,
1325 0xc4,0x21,0x79,0x6e,0x3c,0x40,
1326 0xc4,0xc1,0x79,0x6e,0x04,0x08,
1327
1328 0xc5,0xf9,0x7e,0xc0,
1329 0xc5,0x79,0x7e,0xc0,
1330 0xc4,0xc1,0x79,0x7e,0xc0,
1331
1332 0xc5,0xf9,0x6e,0xc0,
1333 0xc5,0x79,0x6e,0xc0,
1334 0xc4,0xc1,0x79,0x6e,0xc0,
1335
1336 0x48 ,0x88, 0x02,
1337 0x4c, 0x88, 0x02,
1338 0x49, 0x88, 0x00,
1339
1340 0x48 ,0x8a, 0x10,
1341 0x49, 0x8a, 0x10,
1342 0x4c, 0x8a, 0x00,
1343
1344 0x48, 0xc6, 0xc2, 0x0c,
1345 0x48, 0xc6, 0xc0, 0x04,
1346 0x49, 0xc6, 0xc0, 0xff,
1347
1348 0x48, 0xc6, 0x02, 0x0c,
1349 0x48, 0xc6, 0x00, 0x04,
1350 0x49, 0xc6, 0x00, 0xff,
1351 });
1352
1353 test_asm(r, [&](A& a) {
1354 a.vpinsrd(A::xmm1, A::xmm8, A::Mem{A::rsi}, 1); // vpinsrd $1, (%rsi), %xmm8, %xmm1
1355 a.vpinsrd(A::xmm8, A::xmm1, A::Mem{A::r8 }, 3); // vpinsrd $3, (%r8), %xmm1, %xmm8;
1356
1357 a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrw $4, (%rsi), %xmm8, %xmm1
1358 a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinrsw $12, (%r8), %xmm1, %xmm8
1359
1360 a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrb $4, (%rsi), %xmm8, %xmm1
1361 a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinsrb $12, (%r8), %xmm1, %xmm8
1362
1363 a.vextracti128(A::xmm1, A::ymm8, 1); // vextracti128 $1, %ymm8, %xmm1
1364 a.vextracti128(A::xmm8, A::ymm1, 0); // vextracti128 $0, %ymm1, %xmm8
1365
1366 a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3); // vpextrd $3, %xmm8, (%rsi)
1367 a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2); // vpextrd $2, %xmm1, (%r8)
1368
1369 a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7);
1370 a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15);
1371
1372 a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7);
1373 a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15);
1374 },{
1375 0xc4,0xe3,0x39, 0x22, 0x0e, 1,
1376 0xc4,0x43,0x71, 0x22, 0x00, 3,
1377
1378 0xc5,0xb9, 0xc4, 0x0e, 4,
1379 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1380
1381 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1382 0xc4,0x43,0x71, 0x20, 0x00, 12,
1383
1384 0xc4,0x63,0x7d,0x39,0xc1, 1,
1385 0xc4,0xc3,0x7d,0x39,0xc8, 0,
1386
1387 0xc4,0x63,0x79,0x16,0x06, 3,
1388 0xc4,0xc3,0x79,0x16,0x08, 2,
1389
1390 0xc4,0x63,0x79, 0x15, 0x06, 7,
1391 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1392
1393 0xc4,0x63,0x79, 0x14, 0x06, 7,
1394 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1395 });
1396
1397 test_asm(r, [&](A& a) {
1398 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1399 },{
1400 0xc5, 0x9d, 0xdf, 0xda,
1401 });
1402
1403 test_asm(r, [&](A& a) {
1404 A::Label l;
1405 a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3
1406
1407 a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3
1408 a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3
1409 a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3
1410
1411 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3
1412 a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3
1413
1414 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3
1415 a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3
1416 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3
1417 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3
1418 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3
1419
1420 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3
1421 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3
1422 a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3
1423
1424 a.vcvttps2dq(A::ymm3, A::ymm2);
1425 a.vcvtdq2ps (A::ymm3, A::ymm2);
1426 a.vcvtps2dq (A::ymm3, A::ymm2);
1427 a.vsqrtps (A::ymm3, A::ymm2);
1428 a.label(&l);
1429 },{
1430 0xc5,0xfd,0x6f,0xda,
1431
1432 0xc5,0xfd,0x6f,0x1e,
1433 0xc5,0xfd,0x6f,0x1c,0x24,
1434 0xc4,0xc1,0x7d,0x6f,0x1b,
1435
1436 0xc5,0xfd,0x6f,0x5e,0x04,
1437 0xc5,0xfd,0x6f,0x5c,0x24,0x04,
1438
1439 0xc5,0xfd,0x6f,0x5c,0xc6,0x04,
1440 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04,
1441 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04,
1442 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1443 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1444
1445 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40,
1446 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00,
1447
1448 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00,
1449
1450 0xc5,0xfe,0x5b,0xda,
1451 0xc5,0xfc,0x5b,0xda,
1452 0xc5,0xfd,0x5b,0xda,
1453 0xc5,0xfc,0x51,0xda,
1454 });
1455
1456 test_asm(r, [&](A& a) {
1457 a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT);
1458 a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL);
1459
1460 a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE});
1461 a.vcvtph2ps(A::ymm2, A::xmm3);
1462 },{
1463 0xc4,0xe3,0x7d,0x1d,0xd3,0x04,
1464 0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02,
1465
1466 0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c,
1467 0xc4,0xe2,0x7d,0x13,0xd3,
1468 });
1469
1470 test_asm(r, [&](A& a) {
1471 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1472 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1473 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1474 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1475 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1476 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1477 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1478 },{
1479 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1480 0xc4,0xe2,0x75,0x92,0x04,0x10,
1481 0xc4,0x62,0x75,0x92,0x14,0x10,
1482 0xc4,0xa2,0x75,0x92,0x04,0x20,
1483 0xc4,0xc2,0x75,0x92,0x04,0x11,
1484 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1485 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1486 });
1487
1488 test_asm(r, [&](A& a) {
1489 a.mov(A::rax, A::Mem{A::rdi, 0});
1490 a.mov(A::rax, A::Mem{A::rdi, 1});
1491 a.mov(A::rax, A::Mem{A::rdi, 512});
1492 a.mov(A::r15, A::Mem{A::r13, 42});
1493 a.mov(A::rax, A::Mem{A::r13, 42});
1494 a.mov(A::r15, A::Mem{A::rax, 42});
1495 a.mov(A::rax, 1);
1496 a.mov(A::rax, A::rcx);
1497 },{
1498 0x48, 0x8b, 0x07,
1499 0x48, 0x8b, 0x47, 0x01,
1500 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1501 0x4d, 0x8b, 0x7d, 0x2a,
1502 0x49, 0x8b, 0x45, 0x2a,
1503 0x4c, 0x8b, 0x78, 0x2a,
1504 0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00,
1505 0x48, 0x89, 0xc8,
1506 });
1507
1508 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1509
1510 test_asm(r, [&](A& a) {
1511 a.and16b(A::v4, A::v3, A::v1);
1512 a.orr16b(A::v4, A::v3, A::v1);
1513 a.eor16b(A::v4, A::v3, A::v1);
1514 a.bic16b(A::v4, A::v3, A::v1);
1515 a.bsl16b(A::v4, A::v3, A::v1);
1516 a.not16b(A::v4, A::v3);
1517
1518 a.add4s(A::v4, A::v3, A::v1);
1519 a.sub4s(A::v4, A::v3, A::v1);
1520 a.mul4s(A::v4, A::v3, A::v1);
1521
1522 a.cmeq4s(A::v4, A::v3, A::v1);
1523 a.cmgt4s(A::v4, A::v3, A::v1);
1524
1525 a.sub8h(A::v4, A::v3, A::v1);
1526 a.mul8h(A::v4, A::v3, A::v1);
1527
1528 a.fadd4s(A::v4, A::v3, A::v1);
1529 a.fsub4s(A::v4, A::v3, A::v1);
1530 a.fmul4s(A::v4, A::v3, A::v1);
1531 a.fdiv4s(A::v4, A::v3, A::v1);
1532 a.fmin4s(A::v4, A::v3, A::v1);
1533 a.fmax4s(A::v4, A::v3, A::v1);
1534
1535 a.fneg4s (A::v4, A::v3);
1536 a.fsqrt4s(A::v4, A::v3);
1537
1538 a.fmla4s(A::v4, A::v3, A::v1);
1539 a.fmls4s(A::v4, A::v3, A::v1);
1540
1541 a.fcmeq4s(A::v4, A::v3, A::v1);
1542 a.fcmgt4s(A::v4, A::v3, A::v1);
1543 a.fcmge4s(A::v4, A::v3, A::v1);
1544 },{
1545 0x64,0x1c,0x21,0x4e,
1546 0x64,0x1c,0xa1,0x4e,
1547 0x64,0x1c,0x21,0x6e,
1548 0x64,0x1c,0x61,0x4e,
1549 0x64,0x1c,0x61,0x6e,
1550 0x64,0x58,0x20,0x6e,
1551
1552 0x64,0x84,0xa1,0x4e,
1553 0x64,0x84,0xa1,0x6e,
1554 0x64,0x9c,0xa1,0x4e,
1555
1556 0x64,0x8c,0xa1,0x6e,
1557 0x64,0x34,0xa1,0x4e,
1558
1559 0x64,0x84,0x61,0x6e,
1560 0x64,0x9c,0x61,0x4e,
1561
1562 0x64,0xd4,0x21,0x4e,
1563 0x64,0xd4,0xa1,0x4e,
1564 0x64,0xdc,0x21,0x6e,
1565 0x64,0xfc,0x21,0x6e,
1566 0x64,0xf4,0xa1,0x4e,
1567 0x64,0xf4,0x21,0x4e,
1568
1569 0x64,0xf8,0xa0,0x6e,
1570 0x64,0xf8,0xa1,0x6e,
1571
1572 0x64,0xcc,0x21,0x4e,
1573 0x64,0xcc,0xa1,0x4e,
1574
1575 0x64,0xe4,0x21,0x4e,
1576 0x64,0xe4,0xa1,0x6e,
1577 0x64,0xe4,0x21,0x6e,
1578 });
1579
1580 test_asm(r, [&](A& a) {
1581 a.shl4s(A::v4, A::v3, 0);
1582 a.shl4s(A::v4, A::v3, 1);
1583 a.shl4s(A::v4, A::v3, 8);
1584 a.shl4s(A::v4, A::v3, 16);
1585 a.shl4s(A::v4, A::v3, 31);
1586
1587 a.sshr4s(A::v4, A::v3, 1);
1588 a.sshr4s(A::v4, A::v3, 8);
1589 a.sshr4s(A::v4, A::v3, 31);
1590
1591 a.ushr4s(A::v4, A::v3, 1);
1592 a.ushr4s(A::v4, A::v3, 8);
1593 a.ushr4s(A::v4, A::v3, 31);
1594
1595 a.ushr8h(A::v4, A::v3, 1);
1596 a.ushr8h(A::v4, A::v3, 8);
1597 a.ushr8h(A::v4, A::v3, 15);
1598 },{
1599 0x64,0x54,0x20,0x4f,
1600 0x64,0x54,0x21,0x4f,
1601 0x64,0x54,0x28,0x4f,
1602 0x64,0x54,0x30,0x4f,
1603 0x64,0x54,0x3f,0x4f,
1604
1605 0x64,0x04,0x3f,0x4f,
1606 0x64,0x04,0x38,0x4f,
1607 0x64,0x04,0x21,0x4f,
1608
1609 0x64,0x04,0x3f,0x6f,
1610 0x64,0x04,0x38,0x6f,
1611 0x64,0x04,0x21,0x6f,
1612
1613 0x64,0x04,0x1f,0x6f,
1614 0x64,0x04,0x18,0x6f,
1615 0x64,0x04,0x11,0x6f,
1616 });
1617
1618 test_asm(r, [&](A& a) {
1619 a.sli4s(A::v4, A::v3, 0);
1620 a.sli4s(A::v4, A::v3, 1);
1621 a.sli4s(A::v4, A::v3, 8);
1622 a.sli4s(A::v4, A::v3, 16);
1623 a.sli4s(A::v4, A::v3, 31);
1624 },{
1625 0x64,0x54,0x20,0x6f,
1626 0x64,0x54,0x21,0x6f,
1627 0x64,0x54,0x28,0x6f,
1628 0x64,0x54,0x30,0x6f,
1629 0x64,0x54,0x3f,0x6f,
1630 });
1631
1632 test_asm(r, [&](A& a) {
1633 a.scvtf4s (A::v4, A::v3);
1634 a.fcvtzs4s(A::v4, A::v3);
1635 a.fcvtns4s(A::v4, A::v3);
1636 a.frintp4s(A::v4, A::v3);
1637 a.frintm4s(A::v4, A::v3);
1638 a.fcvtn (A::v4, A::v3);
1639 a.fcvtl (A::v4, A::v3);
1640 },{
1641 0x64,0xd8,0x21,0x4e,
1642 0x64,0xb8,0xa1,0x4e,
1643 0x64,0xa8,0x21,0x4e,
1644 0x64,0x88,0xa1,0x4e,
1645 0x64,0x98,0x21,0x4e,
1646 0x64,0x68,0x21,0x0e,
1647 0x64,0x78,0x21,0x0e,
1648 });
1649
1650 test_asm(r, [&](A& a) {
1651 a.sub (A::sp, A::sp, 32); // sub sp, sp, #32
1652 a.strq(A::v0, A::sp, 1); // str q0, [sp, #16]
1653 a.strq(A::v1, A::sp); // str q1, [sp]
1654 a.strd(A::v0, A::sp, 6); // str s0, [sp, #48]
1655 a.strs(A::v0, A::sp, 6); // str s0, [sp, #24]
1656 a.strh(A::v0, A::sp, 10); // str h0, [sp, #20]
1657 a.strb(A::v0, A::sp, 47); // str b0, [sp, #47]
1658 a.ldrb(A::v9, A::sp, 42); // ldr b9, [sp, #42]
1659 a.ldrh(A::v9, A::sp, 47); // ldr h9, [sp, #94]
1660 a.ldrs(A::v7, A::sp, 10); // ldr s7, [sp, #40]
1661 a.ldrd(A::v7, A::sp, 1); // ldr d7, [sp, #8]
1662 a.ldrq(A::v5, A::sp, 128); // ldr q5, [sp, #2048]
1663 a.add (A::sp, A::sp, 32); // add sp, sp, #32
1664 },{
1665 0xff,0x83,0x00,0xd1,
1666 0xe0,0x07,0x80,0x3d,
1667 0xe1,0x03,0x80,0x3d,
1668 0xe0,0x1b,0x00,0xfd,
1669 0xe0,0x1b,0x00,0xbd,
1670 0xe0,0x2b,0x00,0x7d,
1671 0xe0,0xbf,0x00,0x3d,
1672 0xe9,0xab,0x40,0x3d,
1673 0xe9,0xbf,0x40,0x7d,
1674 0xe7,0x2b,0x40,0xbd,
1675 0xe7,0x07,0x40,0xfd,
1676 0xe5,0x03,0xc2,0x3d,
1677 0xff,0x83,0x00,0x91,
1678 });
1679
1680 test_asm(r, [&](A& a) {
1681 a.brk(0);
1682 a.brk(65535);
1683
1684 a.ret(A::x30); // Conventional ret using link register.
1685 a.ret(A::x13); // Can really return using any register if we like.
1686
1687 a.add(A::x2, A::x2, 4);
1688 a.add(A::x3, A::x2, 32);
1689
1690 a.sub(A::x2, A::x2, 4);
1691 a.sub(A::x3, A::x2, 32);
1692
1693 a.subs(A::x2, A::x2, 4);
1694 a.subs(A::x3, A::x2, 32);
1695
1696 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1697 a.cmp(A::x2, 4);
1698
1699 A::Label l;
1700 a.label(&l);
1701 a.bne(&l);
1702 a.bne(&l);
1703 a.blt(&l);
1704 a.b(&l);
1705 a.cbnz(A::x2, &l);
1706 a.cbz(A::x2, &l);
1707
1708 a.add(A::x3, A::x2, A::x1); // add x3,x2,x1
1709 a.add(A::x3, A::x2, A::x1, A::ASR, 3); // add x3,x2,x1, asr #3
1710 },{
1711 0x00,0x00,0x20,0xd4,
1712 0xe0,0xff,0x3f,0xd4,
1713
1714 0xc0,0x03,0x5f,0xd6,
1715 0xa0,0x01,0x5f,0xd6,
1716
1717 0x42,0x10,0x00,0x91,
1718 0x43,0x80,0x00,0x91,
1719
1720 0x42,0x10,0x00,0xd1,
1721 0x43,0x80,0x00,0xd1,
1722
1723 0x42,0x10,0x00,0xf1,
1724 0x43,0x80,0x00,0xf1,
1725
1726 0x5f,0x10,0x00,0xf1,
1727 0x5f,0x10,0x00,0xf1,
1728
1729 0x01,0x00,0x00,0x54, // b.ne #0
1730 0xe1,0xff,0xff,0x54, // b.ne #-4
1731 0xcb,0xff,0xff,0x54, // b.lt #-8
1732 0xae,0xff,0xff,0x54, // b.al #-12
1733 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1734 0x62,0xff,0xff,0xb4, // cbz x2, #-20
1735
1736 0x43,0x00,0x01,0x8b,
1737 0x43,0x0c,0x81,0x8b,
1738 });
1739
1740 // Can we cbz() to a not-yet-defined label?
1741 test_asm(r, [&](A& a) {
1742 A::Label l;
1743 a.cbz(A::x2, &l);
1744 a.add(A::x3, A::x2, 32);
1745 a.label(&l);
1746 a.ret(A::x30);
1747 },{
1748 0x42,0x00,0x00,0xb4, // cbz x2, #8
1749 0x43,0x80,0x00,0x91, // add x3, x2, #32
1750 0xc0,0x03,0x5f,0xd6, // ret
1751 });
1752
1753 // If we start a label as a backward label,
1754 // can we redefine it to be a future label?
1755 // (Not sure this is useful... just want to test it works.)
1756 test_asm(r, [&](A& a) {
1757 A::Label l1;
1758 a.label(&l1);
1759 a.add(A::x3, A::x2, 32);
1760 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1761
1762 A::Label l2; // Start off the same...
1763 a.label(&l2);
1764 a.add(A::x3, A::x2, 32);
1765 a.cbz(A::x2, &l2); // Looks like this will go backward...
1766 a.add(A::x2, A::x2, 4);
1767 a.add(A::x3, A::x2, 32);
1768 a.label(&l2); // But no... actually forward! What a switcheroo!
1769 },{
1770 0x43,0x80,0x00,0x91, // add x3, x2, #32
1771 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1772
1773 0x43,0x80,0x00,0x91, // add x3, x2, #32
1774 0x62,0x00,0x00,0xb4, // cbz x2, #12
1775 0x42,0x10,0x00,0x91, // add x2, x2, #4
1776 0x43,0x80,0x00,0x91, // add x3, x2, #32
1777 });
1778
1779 // Loading from a label on ARM.
1780 test_asm(r, [&](A& a) {
1781 A::Label fore,aft;
1782 a.label(&fore);
1783 a.word(0x01234567);
1784 a.ldrq(A::v1, &fore);
1785 a.ldrq(A::v2, &aft);
1786 a.label(&aft);
1787 a.word(0x76543210);
1788 },{
1789 0x67,0x45,0x23,0x01,
1790 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
1791 0x22,0x00,0x00,0x9c, // ldr q2, #4
1792 0x10,0x32,0x54,0x76,
1793 });
1794
1795 test_asm(r, [&](A& a) {
1796 a.ldrq(A::v0, A::x8);
1797 a.strq(A::v0, A::x8);
1798 },{
1799 0x00,0x01,0xc0,0x3d,
1800 0x00,0x01,0x80,0x3d,
1801 });
1802
1803 test_asm(r, [&](A& a) {
1804 a.dup4s (A::v0, A::x8);
1805 a.ld1r4s (A::v0, A::x8); // echo 'ld1r.4s {v0}, [x8]' | llvm-mc --show-encoding
1806 a.ld1r8h (A::v0, A::x8);
1807 a.ld1r16b(A::v0, A::x8);
1808 },{
1809 0x00,0x0d,0x04,0x4e,
1810 0x00,0xc9,0x40,0x4d,
1811 0x00,0xc5,0x40,0x4d,
1812 0x00,0xc1,0x40,0x4d,
1813 });
1814
1815 test_asm(r, [&](A& a) {
1816 a.ld24s(A::v0, A::x8); // echo 'ld2.4s {v0,v1}, [x8]' | llvm-mc --show-encoding
1817 a.ld44s(A::v0, A::x8);
1818 a.st24s(A::v0, A::x8);
1819 a.st44s(A::v0, A::x8); // echo 'st4.4s {v0,v1,v2,v3}, [x8]' | llvm-mc --show-encoding
1820
1821 a.ld24s(A::v0, A::x8, 0); //echo 'ld2 {v0.s,v1.s}[0], [x8]' | llvm-mc --show-encoding
1822 a.ld24s(A::v0, A::x8, 1);
1823 a.ld24s(A::v0, A::x8, 2);
1824 a.ld24s(A::v0, A::x8, 3);
1825
1826 a.ld44s(A::v0, A::x8, 0); // ld4 {v0.s,v1.s,v2.s,v3.s}[0], [x8]
1827 a.ld44s(A::v0, A::x8, 1);
1828 a.ld44s(A::v0, A::x8, 2);
1829 a.ld44s(A::v0, A::x8, 3);
1830 },{
1831 0x00,0x89,0x40,0x4c,
1832 0x00,0x09,0x40,0x4c,
1833 0x00,0x89,0x00,0x4c,
1834 0x00,0x09,0x00,0x4c,
1835
1836 0x00,0x81,0x60,0x0d,
1837 0x00,0x91,0x60,0x0d,
1838 0x00,0x81,0x60,0x4d,
1839 0x00,0x91,0x60,0x4d,
1840
1841 0x00,0xa1,0x60,0x0d,
1842 0x00,0xb1,0x60,0x0d,
1843 0x00,0xa1,0x60,0x4d,
1844 0x00,0xb1,0x60,0x4d,
1845 });
1846
1847 test_asm(r, [&](A& a) {
1848 a.xtns2h(A::v0, A::v0);
1849 a.xtnh2b(A::v0, A::v0);
1850 a.strs (A::v0, A::x0);
1851
1852 a.ldrs (A::v0, A::x0);
1853 a.uxtlb2h(A::v0, A::v0);
1854 a.uxtlh2s(A::v0, A::v0);
1855
1856 a.uminv4s(A::v3, A::v4);
1857 a.movs (A::x3, A::v4,0); // mov.s w3,v4[0]
1858 a.movs (A::x3, A::v4,1); // mov.s w3,v4[1]
1859 a.inss (A::v4, A::x3,3); // ins.s v4[3],w3
1860 },{
1861 0x00,0x28,0x61,0x0e,
1862 0x00,0x28,0x21,0x0e,
1863 0x00,0x00,0x00,0xbd,
1864
1865 0x00,0x00,0x40,0xbd,
1866 0x00,0xa4,0x08,0x2f,
1867 0x00,0xa4,0x10,0x2f,
1868
1869 0x83,0xa8,0xb1,0x6e,
1870 0x83,0x3c,0x04,0x0e,
1871 0x83,0x3c,0x0c,0x0e,
1872 0x64,0x1c,0x1c,0x4e,
1873 });
1874
1875 test_asm(r, [&](A& a) {
1876 a.ldrb(A::v0, A::x8);
1877 a.strb(A::v0, A::x8);
1878 },{
1879 0x00,0x01,0x40,0x3d,
1880 0x00,0x01,0x00,0x3d,
1881 });
1882
1883 test_asm(r, [&](A& a) {
1884 a.ldrd(A::x0, A::x1, 3); // ldr x0, [x1, #24]
1885 a.ldrs(A::x0, A::x1, 3); // ldr w0, [x1, #12]
1886 a.ldrh(A::x0, A::x1, 3); // ldrh w0, [x1, #6]
1887 a.ldrb(A::x0, A::x1, 3); // ldrb w0, [x1, #3]
1888
1889 a.strs(A::x0, A::x1, 3); // str w0, [x1, #12]
1890 },{
1891 0x20,0x0c,0x40,0xf9,
1892 0x20,0x0c,0x40,0xb9,
1893 0x20,0x0c,0x40,0x79,
1894 0x20,0x0c,0x40,0x39,
1895
1896 0x20,0x0c,0x00,0xb9,
1897 });
1898
1899 test_asm(r, [&](A& a) {
1900 a.tbl (A::v0, A::v1, A::v2);
1901 a.uzp14s(A::v0, A::v1, A::v2);
1902 a.uzp24s(A::v0, A::v1, A::v2);
1903 a.zip14s(A::v0, A::v1, A::v2);
1904 a.zip24s(A::v0, A::v1, A::v2);
1905 },{
1906 0x20,0x00,0x02,0x4e,
1907 0x20,0x18,0x82,0x4e,
1908 0x20,0x58,0x82,0x4e,
1909 0x20,0x38,0x82,0x4e,
1910 0x20,0x78,0x82,0x4e,
1911 });
1912 }
1913
DEF_TEST(SkVM_approx_math,r)1914 DEF_TEST(SkVM_approx_math, r) {
1915 auto eval = [](int N, float values[], auto fn) {
1916 skvm::Builder b;
1917 skvm::Ptr inout = b.varying<float>();
1918
1919 b.storeF(inout, fn(&b, b.loadF(inout)));
1920
1921 b.done().eval(N, values);
1922 };
1923
1924 auto compare = [r](int N, const float values[], const float expected[]) {
1925 for (int i = 0; i < N; ++i) {
1926 REPORTER_ASSERT(r, SkScalarNearlyEqual(values[i], expected[i], 0.001f));
1927 }
1928 };
1929
1930 // log2
1931 {
1932 float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
1933 constexpr int N = SK_ARRAY_COUNT(values);
1934 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1935 return b->approx_log2(v);
1936 });
1937 const float expected[] = {-2, -1, 0, 1, 2, 3};
1938 compare(N, values, expected);
1939 }
1940
1941 // pow2
1942 {
1943 float values[] = {-2, -1, 0, 1, 2, 3};
1944 constexpr int N = SK_ARRAY_COUNT(values);
1945 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
1946 return b->approx_pow2(v);
1947 });
1948 const float expected[] = {0.25f, 0.5f, 1, 2, 4, 8};
1949 compare(N, values, expected);
1950 }
1951
1952 // powf -- x^0.5
1953 {
1954 float bases[] = {0, 1, 4, 9, 16};
1955 constexpr int N = SK_ARRAY_COUNT(bases);
1956 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
1957 return b->approx_powf(base, b->splat(0.5f));
1958 });
1959 const float expected[] = {0, 1, 2, 3, 4};
1960 compare(N, bases, expected);
1961 }
1962 // powf -- 3^x
1963 {
1964 float exps[] = {-2, -1, 0, 1, 2};
1965 constexpr int N = SK_ARRAY_COUNT(exps);
1966 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
1967 return b->approx_powf(b->splat(3.0f), exp);
1968 });
1969 const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
1970 compare(N, exps, expected);
1971 }
1972
1973 auto test = [r](float arg, float expected, float tolerance, auto prog) {
1974 skvm::Builder b;
1975 skvm::Ptr inout = b.varying<float>();
1976 b.storeF(inout, prog(b.loadF(inout)));
1977 float actual = arg;
1978 b.done().eval(1, &actual);
1979
1980 float err = std::abs(actual - expected);
1981
1982 if (err > tolerance) {
1983 // SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual);
1984 REPORTER_ASSERT(r, true);
1985 }
1986 return err;
1987 };
1988
1989 auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) {
1990 skvm::Builder b;
1991 skvm::Ptr in0 = b.varying<float>();
1992 skvm::Ptr in1 = b.varying<float>();
1993 skvm::Ptr out = b.varying<float>();
1994 b.storeF(out, prog(b.loadF(in0), b.loadF(in1)));
1995 float actual;
1996 b.done().eval(1, &arg0, &arg1, &actual);
1997
1998 float err = std::abs(actual - expected);
1999
2000 if (err > tolerance) {
2001 // SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual);
2002 REPORTER_ASSERT(r, true);
2003 }
2004 return err;
2005 };
2006
2007 // sine, cosine, tangent
2008 {
2009 constexpr float P = SK_ScalarPI;
2010 constexpr float tol = 0.00175f;
2011 for (float rad = -5*P; rad <= 5*P; rad += 0.1f) {
2012 test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) {
2013 return approx_sin(x);
2014 });
2015 test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) {
2016 return approx_cos(x);
2017 });
2018 }
2019
2020 // Our tangent diverge more as we get near infinities (x near +- Pi/2),
2021 // so bring in the domain a little.
2022 constexpr float eps = 0.16f;
2023 float err = 0;
2024 for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) {
2025 err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) {
2026 return approx_tan(x);
2027 });
2028 // try again with some multiples of P, to check our periodicity
2029 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2030 return approx_tan(x + 3*P);
2031 });
2032 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2033 return approx_tan(x - 3*P);
2034 });
2035 }
2036 if (0) { SkDebugf("tan error %g\n", err); }
2037 }
2038
2039 // asin, acos, atan
2040 {
2041 constexpr float tol = 0.00175f;
2042 float err = 0;
2043 for (float x = -1; x <= 1; x += 1.0f/64) {
2044 err += test(x, asin(x), tol, [](skvm::F32 x) {
2045 return approx_asin(x);
2046 });
2047 test(x, acos(x), tol, [](skvm::F32 x) {
2048 return approx_acos(x);
2049 });
2050 }
2051 if (0) { SkDebugf("asin error %g\n", err); }
2052
2053 err = 0;
2054 for (float x = -10; x <= 10; x += 1.0f/16) {
2055 err += test(x, atan(x), tol, [](skvm::F32 x) {
2056 return approx_atan(x);
2057 });
2058 }
2059 if (0) { SkDebugf("atan error %g\n", err); }
2060
2061 for (float y = -3; y <= 3; y += 1) {
2062 for (float x = -3; x <= 3; x += 1) {
2063 err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) {
2064 return approx_atan2(y,x);
2065 });
2066 }
2067 }
2068 if (0) { SkDebugf("atan2 error %g\n", err); }
2069 }
2070 }
2071
DEF_TEST(SkVM_min_max,r)2072 DEF_TEST(SkVM_min_max, r) {
2073 // min() and max() have subtle behavior when one argument is NaN and
2074 // the other isn't. It's not sound to blindly swap their arguments.
2075 //
2076 // All backends must behave like std::min() and std::max(), which are
2077 //
2078 // min(x,y) = y<x ? y : x
2079 // max(x,y) = x<y ? y : x
2080
2081 // ±NaN, ±0, ±1, ±inf
2082 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2083 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2084
2085 float f[8];
2086 memcpy(f, bits, sizeof(bits));
2087
2088 auto identical = [&](float x, float y) {
2089 uint32_t X,Y;
2090 memcpy(&X, &x, 4);
2091 memcpy(&Y, &y, 4);
2092 return X == Y;
2093 };
2094
2095 // Test min/max with non-constant x, non-constant y.
2096 // (Whether x and y are varying or uniform shouldn't make any difference.)
2097 {
2098 skvm::Builder b;
2099 {
2100 skvm::Ptr src = b.varying<float>(),
2101 mn = b.varying<float>(),
2102 mx = b.varying<float>();
2103
2104 skvm::F32 x = b.loadF(src),
2105 y = b.uniformF(b.uniform(), 0);
2106
2107 b.storeF(mn, b.min(x,y));
2108 b.storeF(mx, b.max(x,y));
2109 }
2110
2111 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2112 float mn[8], mx[8];
2113 for (int i = 0; i < 8; i++) {
2114 // min() and max() everything with f[i].
2115 program.eval(8, f,mn,mx, &f[i]);
2116
2117 for (int j = 0; j < 8; j++) {
2118 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2119 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2120 }
2121 }
2122 });
2123 }
2124
2125 // Test each with constant on the right.
2126 for (int i = 0; i < 8; i++) {
2127 skvm::Builder b;
2128 {
2129 skvm::Ptr src = b.varying<float>(),
2130 mn = b.varying<float>(),
2131 mx = b.varying<float>();
2132
2133 skvm::F32 x = b.loadF(src),
2134 y = b.splat(f[i]);
2135
2136 b.storeF(mn, b.min(x,y));
2137 b.storeF(mx, b.max(x,y));
2138 }
2139
2140 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2141 float mn[8], mx[8];
2142 program.eval(8, f,mn,mx);
2143 for (int j = 0; j < 8; j++) {
2144 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2145 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2146 }
2147 });
2148 }
2149
2150 // Test each with constant on the left.
2151 for (int i = 0; i < 8; i++) {
2152 skvm::Builder b;
2153 {
2154 skvm::Ptr src = b.varying<float>(),
2155 mn = b.varying<float>(),
2156 mx = b.varying<float>();
2157
2158 skvm::F32 x = b.splat(f[i]),
2159 y = b.loadF(src);
2160
2161 b.storeF(mn, b.min(x,y));
2162 b.storeF(mx, b.max(x,y));
2163 }
2164
2165 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2166 float mn[8], mx[8];
2167 program.eval(8, f,mn,mx);
2168 for (int j = 0; j < 8; j++) {
2169 REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j])));
2170 REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j])));
2171 }
2172 });
2173 }
2174 }
2175
DEF_TEST(SkVM_halfs,r)2176 DEF_TEST(SkVM_halfs, r) {
2177 const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000,
2178 0xc400,0xb800,0xbc00,0xc000};
2179 const float fs[] = {+0.0f,+0.5f,+1.0f,+2.0f,
2180 -4.0f,-0.5f,-1.0f,-2.0f};
2181 {
2182 skvm::Builder b;
2183 skvm::Ptr src = b.varying<uint16_t>(),
2184 dst = b.varying<float>();
2185 b.storeF(dst, b.from_fp16(b.load16(src)));
2186
2187 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2188 float dst[8];
2189 program.eval(8, hs, dst);
2190 for (int i = 0; i < 8; i++) {
2191 REPORTER_ASSERT(r, dst[i] == fs[i]);
2192 }
2193 });
2194 }
2195 {
2196 skvm::Builder b;
2197 skvm::Ptr src = b.varying<float>(),
2198 dst = b.varying<uint16_t>();
2199 b.store16(dst, b.to_fp16(b.loadF(src)));
2200
2201 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2202 uint16_t dst[8];
2203 program.eval(8, fs, dst);
2204 for (int i = 0; i < 8; i++) {
2205 REPORTER_ASSERT(r, dst[i] == hs[i]);
2206 }
2207 });
2208 }
2209 }
2210
DEF_TEST(SkVM_64bit,r)2211 DEF_TEST(SkVM_64bit, r) {
2212 uint32_t lo[65],
2213 hi[65];
2214 uint64_t wide[65];
2215 for (int i = 0; i < 65; i++) {
2216 lo[i] = 2*i+0;
2217 hi[i] = 2*i+1;
2218 wide[i] = ((uint64_t)lo[i] << 0)
2219 | ((uint64_t)hi[i] << 32);
2220 }
2221
2222 {
2223 skvm::Builder b;
2224 {
2225 skvm::Ptr wide = b.varying<uint64_t>(),
2226 lo = b.varying<int>(),
2227 hi = b.varying<int>();
2228 b.store32(lo, b.load64(wide, 0));
2229 b.store32(hi, b.load64(wide, 1));
2230 }
2231 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2232 uint32_t l[65], h[65];
2233 program.eval(65, wide,l,h);
2234 for (int i = 0; i < 65; i++) {
2235 REPORTER_ASSERT(r, l[i] == lo[i]);
2236 REPORTER_ASSERT(r, h[i] == hi[i]);
2237 }
2238 });
2239 }
2240
2241 {
2242 skvm::Builder b;
2243 {
2244 skvm::Ptr wide = b.varying<uint64_t>(),
2245 lo = b.varying<int>(),
2246 hi = b.varying<int>();
2247 b.store64(wide, b.load32(lo), b.load32(hi));
2248 }
2249 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2250 uint64_t w[65];
2251 program.eval(65, w,lo,hi);
2252 for (int i = 0; i < 65; i++) {
2253 REPORTER_ASSERT(r, w[i] == wide[i]);
2254 }
2255 });
2256 }
2257 }
2258
DEF_TEST(SkVM_128bit,r)2259 DEF_TEST(SkVM_128bit, r) {
2260 float floats[4*63];
2261 uint8_t packed[4*63];
2262
2263 for (int i = 0; i < 4*63; i++) {
2264 floats[i] = i * (1/255.0f);
2265 }
2266
2267 skvm::PixelFormat rgba_ffff = skvm::SkColorType_to_PixelFormat(kRGBA_F32_SkColorType),
2268 rgba_8888 = skvm::SkColorType_to_PixelFormat(kRGBA_8888_SkColorType);
2269
2270 { // Convert RGBA F32 to RGBA 8888, testing 128-bit loads.
2271 skvm::Builder b;
2272 {
2273 skvm::Ptr dst = b.arg( 4),
2274 src = b.arg(16);
2275
2276 skvm::Color c = b.load(rgba_ffff, src);
2277 b.store(rgba_8888, dst, c);
2278 }
2279 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2280 memset(packed, 0, sizeof(packed));
2281 program.eval(63, packed, floats);
2282 for (int i = 0; i < 4*63; i++) {
2283 REPORTER_ASSERT(r, packed[i] == i);
2284 }
2285 });
2286 }
2287
2288
2289 { // Convert RGBA 8888 to RGBA F32, testing 128-bit stores.
2290 skvm::Builder b;
2291 {
2292 skvm::Ptr dst = b.arg(16),
2293 src = b.arg( 4);
2294
2295 skvm::Color c = b.load(rgba_8888, src);
2296 b.store(rgba_ffff, dst, c);
2297 }
2298 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2299 memset(floats, 0, sizeof(floats));
2300 program.eval(63, floats, packed);
2301 for (int i = 0; i < 4*63; i++) {
2302 REPORTER_ASSERT(r, floats[i] == i * (1/255.0f));
2303 }
2304 });
2305 }
2306
2307 }
2308
DEF_TEST(SkVM_is_NaN_is_finite,r)2309 DEF_TEST(SkVM_is_NaN_is_finite, r) {
2310 skvm::Builder b;
2311 {
2312 skvm::Ptr src = b.varying<float>(),
2313 nan = b.varying<int>(),
2314 fin = b.varying<int>();
2315 b.store32(nan, is_NaN (b.loadF(src)));
2316 b.store32(fin, is_finite(b.loadF(src)));
2317 }
2318 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2319 // ±NaN, ±0, ±1, ±inf
2320 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2321 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2322 uint32_t nan[8], fin[8];
2323 program.eval(8, bits, nan,fin);
2324
2325 for (int i = 0; i < 8; i++) {
2326 REPORTER_ASSERT(r, nan[i] == ((i == 0 || i == 1) ? 0xffffffff : 0));
2327 REPORTER_ASSERT(r, fin[i] == ((i == 2 || i == 3 ||
2328 i == 4 || i == 5) ? 0xffffffff : 0));
2329 }
2330 });
2331 }
2332
DEF_TEST(SkVM_args,r)2333 DEF_TEST(SkVM_args, r) {
2334 // Test we can handle at least six arguments.
2335 skvm::Builder b;
2336 {
2337 skvm::Ptr dst = b.varying<float>(),
2338 A = b.varying<float>(),
2339 B = b.varying<float>(),
2340 C = b.varying<float>(),
2341 D = b.varying<float>(),
2342 E = b.varying<float>();
2343 storeF(dst, b.loadF(A)
2344 + b.loadF(B)
2345 + b.loadF(C)
2346 + b.loadF(D)
2347 + b.loadF(E));
2348 }
2349
2350 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2351 float dst[17],A[17],B[17],C[17],D[17],E[17];
2352 for (int i = 0; i < 17; i++) {
2353 A[i] = B[i] = C[i] = D[i] = E[i] = (float)i;
2354 }
2355 program.eval(17, dst,A,B,C,D,E);
2356 for (int i = 0; i < 17; i++) {
2357 REPORTER_ASSERT(r, dst[i] == 5.0f*i);
2358 }
2359 });
2360 }
2361
DEF_TEST(SkVM_badpack,r)2362 DEF_TEST(SkVM_badpack, r) {
2363 // Test case distilled from actual failing draw,
2364 // originally with a bad arm64 implementation of pack().
2365 skvm::Builder p;
2366 {
2367 skvm::Ptr uniforms = p.uniform(),
2368 dst = p.varying<uint16_t>();
2369
2370 skvm::I32 r = round(p.uniformF(uniforms, 8) * 15),
2371 a = p.splat(0xf);
2372
2373 skvm::I32 _4444 = p.splat(0);
2374 _4444 = pack(_4444, r, 12);
2375 _4444 = pack(_4444, a, 0);
2376 store16(dst, _4444);
2377 }
2378
2379 test_jit_and_interpreter(p, [&](const skvm::Program& program){
2380 const float uniforms[] = { 0.0f, 0.0f,
2381 1.0f, 0.0f, 0.0f, 1.0f };
2382
2383 uint16_t dst[17] = {0};
2384 program.eval(17, uniforms,dst);
2385 for (int i = 0; i < 17; i++) {
2386 REPORTER_ASSERT(r, dst[i] == 0xf00f, "got %04x, want %04x\n", dst[i], 0xf00f);
2387 }
2388 });
2389 }
2390
DEF_TEST(SkVM_features,r)2391 DEF_TEST(SkVM_features, r) {
2392 auto build_program = [](skvm::Builder* b) {
2393 skvm::F32 x = b->loadF(b->varying<float>());
2394 b->storeF(b->varying<float>(), x*x+x);
2395 };
2396
2397 { // load-fma-store with FMA available.
2398 skvm::Features features;
2399 features.fma = true;
2400 skvm::Builder b(features);
2401 build_program(&b);
2402 REPORTER_ASSERT(r, b.optimize().size() == 3);
2403 }
2404
2405 { // load-mul-add-store without FMA.
2406 skvm::Features features;
2407 features.fma = false;
2408 skvm::Builder b(features);
2409 build_program(&b);
2410 REPORTER_ASSERT(r, b.optimize().size() == 4);
2411 }
2412
2413 { // Auto-detected, could be either.
2414 skvm::Builder b;
2415 build_program(&b);
2416 REPORTER_ASSERT(r, b.optimize().size() == 3
2417 || b.optimize().size() == 4);
2418 }
2419 }
2420
DEF_TEST(SkVM_gather_can_hoist,r)2421 DEF_TEST(SkVM_gather_can_hoist, r) {
2422 // A gather instruction isn't necessarily varying... it's whatever its index is.
2423 // First a typical gather scenario with varying index.
2424 {
2425 skvm::Builder b;
2426 skvm::Ptr uniforms = b.uniform(),
2427 buf = b.varying<int>();
2428 skvm::I32 ix = b.load32(buf);
2429 b.store32(buf, b.gather32(uniforms,0, ix));
2430
2431 skvm::Program p = b.done();
2432
2433 // ix is varying, so the gather is too.
2434 //
2435 // loop:
2436 // v0 = load32 buf
2437 // v1 = gather32 uniforms+0 v0
2438 // store32 buf v1
2439 REPORTER_ASSERT(r, p.instructions().size() == 3);
2440 REPORTER_ASSERT(r, p.loop() == 0);
2441 }
2442
2443 // Now the same but with a uniform index instead.
2444 {
2445 skvm::Builder b;
2446 skvm::Ptr uniforms = b.uniform(),
2447 buf = b.varying<int>();
2448 skvm::I32 ix = b.uniform32(uniforms,8);
2449 b.store32(buf, b.gather32(uniforms,0, ix));
2450
2451 skvm::Program p = b.done();
2452
2453 // ix is uniform, so the gather is too.
2454 //
2455 // v0 = uniform32 uniforms+8
2456 // v1 = gather32 uniforms+0 v0
2457 // loop:
2458 // store32 buf v1
2459 REPORTER_ASSERT(r, p.instructions().size() == 3);
2460 REPORTER_ASSERT(r, p.loop() == 2);
2461 }
2462 }
2463
DEF_TEST(SkVM_dont_dedup_loads,r)2464 DEF_TEST(SkVM_dont_dedup_loads, r) {
2465 // We've been assuming that all Ops with the same arguments produce the same value
2466 // and deduplicating them, which results in a simple common subexpression eliminator.
2467 //
2468 // But we can't soundly dedup two identical loads with a store between.
2469 // If we dedup the loads in this test program it will always increment by 1, not K.
2470 constexpr int K = 2;
2471 skvm::Builder b;
2472 {
2473 skvm::Ptr buf = b.varying<int>();
2474 for (int i = 0; i < K; i++) {
2475 b.store32(buf, b.load32(buf) + 1);
2476 }
2477 }
2478
2479 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2480 int buf[] = { 0,1,2,3,4 };
2481 program.eval(SK_ARRAY_COUNT(buf), buf);
2482 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
2483 REPORTER_ASSERT(r, buf[i] == i+K);
2484 }
2485 });
2486 }
2487
DEF_TEST(SkVM_dont_dedup_stores,r)2488 DEF_TEST(SkVM_dont_dedup_stores, r) {
2489 // Following a similar line of reasoning to SkVM_dont_dedup_loads,
2490 // we cannot dedup stores either. A different store between two identical stores
2491 // will invalidate the first store, meaning we do need to reissue that store operation.
2492 skvm::Builder b;
2493 {
2494 skvm::Ptr buf = b.varying<int>();
2495 b.store32(buf, b.splat(4));
2496 b.store32(buf, b.splat(5));
2497 b.store32(buf, b.splat(4)); // If we dedup'd, we'd skip this store.
2498 }
2499
2500 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2501 int buf[42];
2502 program.eval(SK_ARRAY_COUNT(buf), buf);
2503 for (int x : buf) {
2504 REPORTER_ASSERT(r, x == 4);
2505 }
2506 });
2507 }
2508
DEF_TEST(SkVM_fast_mul,r)2509 DEF_TEST(SkVM_fast_mul, r) {
2510 skvm::Builder b;
2511 {
2512 skvm::Ptr src = b.varying<float>(),
2513 fast = b.varying<float>(),
2514 slow = b.varying<float>();
2515 skvm::F32 x = b.loadF(src);
2516 b.storeF(fast, fast_mul(0.0f, x));
2517 b.storeF(slow, 0.0f * x);
2518 }
2519 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2520 const uint32_t bits[] = {
2521 0x0000'0000, 0x8000'0000, //±0
2522 0x3f80'0000, 0xbf80'0000, //±1
2523 0x7f80'0000, 0xff80'0000, //±inf
2524 0x7f80'0001, 0xff80'0001, //±NaN
2525 };
2526 float fast[8],
2527 slow[8];
2528 program.eval(8,bits,fast,slow);
2529
2530 for (int i = 0; i < 8; i++) {
2531 REPORTER_ASSERT(r, fast[i] == 0.0f);
2532
2533 if (i < 4) {
2534 REPORTER_ASSERT(r, slow[i] == 0.0f);
2535 } else {
2536 REPORTER_ASSERT(r, isnan(slow[i]));
2537 }
2538 }
2539 });
2540 }
2541