1 // Copyright 2015, ARM Limited
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifdef VIXL_INCLUDE_SIMULATOR
28
29 #include <cmath>
30 #include "vixl/a64/simulator-a64.h"
31
32 namespace vixl {
33
FPDefaultNaN()34 template<> double Simulator::FPDefaultNaN<double>() {
35 return kFP64DefaultNaN;
36 }
37
38
FPDefaultNaN()39 template<> float Simulator::FPDefaultNaN<float>() {
40 return kFP32DefaultNaN;
41 }
42
43 // See FPRound for a description of this function.
FPRoundToDouble(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)44 static inline double FPRoundToDouble(int64_t sign, int64_t exponent,
45 uint64_t mantissa, FPRounding round_mode) {
46 int64_t bits =
47 FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
48 exponent,
49 mantissa,
50 round_mode);
51 return rawbits_to_double(bits);
52 }
53
54
55 // See FPRound for a description of this function.
FPRoundToFloat(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)56 static inline float FPRoundToFloat(int64_t sign, int64_t exponent,
57 uint64_t mantissa, FPRounding round_mode) {
58 int32_t bits =
59 FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
60 exponent,
61 mantissa,
62 round_mode);
63 return rawbits_to_float(bits);
64 }
65
66
67 // See FPRound for a description of this function.
FPRoundToFloat16(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)68 static inline float16 FPRoundToFloat16(int64_t sign,
69 int64_t exponent,
70 uint64_t mantissa,
71 FPRounding round_mode) {
72 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
73 sign, exponent, mantissa, round_mode);
74 }
75
76
FixedToDouble(int64_t src,int fbits,FPRounding round)77 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
78 if (src >= 0) {
79 return UFixedToDouble(src, fbits, round);
80 } else {
81 // This works for all negative values, including INT64_MIN.
82 return -UFixedToDouble(-src, fbits, round);
83 }
84 }
85
86
UFixedToDouble(uint64_t src,int fbits,FPRounding round)87 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
88 // An input of 0 is a special case because the result is effectively
89 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
90 if (src == 0) {
91 return 0.0;
92 }
93
94 // Calculate the exponent. The highest significant bit will have the value
95 // 2^exponent.
96 const int highest_significant_bit = 63 - CountLeadingZeros(src);
97 const int64_t exponent = highest_significant_bit - fbits;
98
99 return FPRoundToDouble(0, exponent, src, round);
100 }
101
102
FixedToFloat(int64_t src,int fbits,FPRounding round)103 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
104 if (src >= 0) {
105 return UFixedToFloat(src, fbits, round);
106 } else {
107 // This works for all negative values, including INT64_MIN.
108 return -UFixedToFloat(-src, fbits, round);
109 }
110 }
111
112
UFixedToFloat(uint64_t src,int fbits,FPRounding round)113 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
114 // An input of 0 is a special case because the result is effectively
115 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
116 if (src == 0) {
117 return 0.0f;
118 }
119
120 // Calculate the exponent. The highest significant bit will have the value
121 // 2^exponent.
122 const int highest_significant_bit = 63 - CountLeadingZeros(src);
123 const int32_t exponent = highest_significant_bit - fbits;
124
125 return FPRoundToFloat(0, exponent, src, round);
126 }
127
128
FPToDouble(float value)129 double Simulator::FPToDouble(float value) {
130 switch (std::fpclassify(value)) {
131 case FP_NAN: {
132 if (IsSignallingNaN(value)) {
133 FPProcessException();
134 }
135 if (DN()) return kFP64DefaultNaN;
136
137 // Convert NaNs as the processor would:
138 // - The sign is propagated.
139 // - The payload (mantissa) is transferred entirely, except that the top
140 // bit is forced to '1', making the result a quiet NaN. The unused
141 // (low-order) payload bits are set to 0.
142 uint32_t raw = float_to_rawbits(value);
143
144 uint64_t sign = raw >> 31;
145 uint64_t exponent = (1 << 11) - 1;
146 uint64_t payload = unsigned_bitextract_64(21, 0, raw);
147 payload <<= (52 - 23); // The unused low-order bits should be 0.
148 payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
149
150 return rawbits_to_double((sign << 63) | (exponent << 52) | payload);
151 }
152
153 case FP_ZERO:
154 case FP_NORMAL:
155 case FP_SUBNORMAL:
156 case FP_INFINITE: {
157 // All other inputs are preserved in a standard cast, because every value
158 // representable using an IEEE-754 float is also representable using an
159 // IEEE-754 double.
160 return static_cast<double>(value);
161 }
162 }
163
164 VIXL_UNREACHABLE();
165 return static_cast<double>(value);
166 }
167
168
FPToFloat(float16 value)169 float Simulator::FPToFloat(float16 value) {
170 uint32_t sign = value >> 15;
171 uint32_t exponent = unsigned_bitextract_32(
172 kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits,
173 value);
174 uint32_t mantissa = unsigned_bitextract_32(
175 kFloat16MantissaBits - 1, 0, value);
176
177 switch (float16classify(value)) {
178 case FP_ZERO:
179 return (sign == 0) ? 0.0f : -0.0f;
180
181 case FP_INFINITE:
182 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
183
184 case FP_SUBNORMAL: {
185 // Calculate shift required to put mantissa into the most-significant bits
186 // of the destination mantissa.
187 int shift = CountLeadingZeros(mantissa << (32 - 10));
188
189 // Shift mantissa and discard implicit '1'.
190 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
191 mantissa &= (1 << kFloatMantissaBits) - 1;
192
193 // Adjust the exponent for the shift applied, and rebias.
194 exponent = exponent - shift + (-15 + 127);
195 break;
196 }
197
198 case FP_NAN:
199 if (IsSignallingNaN(value)) {
200 FPProcessException();
201 }
202 if (DN()) return kFP32DefaultNaN;
203
204 // Convert NaNs as the processor would:
205 // - The sign is propagated.
206 // - The payload (mantissa) is transferred entirely, except that the top
207 // bit is forced to '1', making the result a quiet NaN. The unused
208 // (low-order) payload bits are set to 0.
209 exponent = (1 << kFloatExponentBits) - 1;
210
211 // Increase bits in mantissa, making low-order bits 0.
212 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
213 mantissa |= 1 << 22; // Force a quiet NaN.
214 break;
215
216 case FP_NORMAL:
217 // Increase bits in mantissa, making low-order bits 0.
218 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
219
220 // Change exponent bias.
221 exponent += (-15 + 127);
222 break;
223
224 default: VIXL_UNREACHABLE();
225 }
226 return rawbits_to_float((sign << 31) |
227 (exponent << kFloatMantissaBits) |
228 mantissa);
229 }
230
231
FPToFloat16(float value,FPRounding round_mode)232 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
233 // Only the FPTieEven rounding mode is implemented.
234 VIXL_ASSERT(round_mode == FPTieEven);
235 USE(round_mode);
236
237 uint32_t raw = float_to_rawbits(value);
238 int32_t sign = raw >> 31;
239 int32_t exponent = unsigned_bitextract_32(30, 23, raw) - 127;
240 uint32_t mantissa = unsigned_bitextract_32(22, 0, raw);
241
242 switch (std::fpclassify(value)) {
243 case FP_NAN: {
244 if (IsSignallingNaN(value)) {
245 FPProcessException();
246 }
247 if (DN()) return kFP16DefaultNaN;
248
249 // Convert NaNs as the processor would:
250 // - The sign is propagated.
251 // - The payload (mantissa) is transferred as much as possible, except
252 // that the top bit is forced to '1', making the result a quiet NaN.
253 float16 result = (sign == 0) ? kFP16PositiveInfinity
254 : kFP16NegativeInfinity;
255 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
256 result |= (1 << 9); // Force a quiet NaN;
257 return result;
258 }
259
260 case FP_ZERO:
261 return (sign == 0) ? 0 : 0x8000;
262
263 case FP_INFINITE:
264 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
265
266 case FP_NORMAL:
267 case FP_SUBNORMAL: {
268 // Convert float-to-half as the processor would, assuming that FPCR.FZ
269 // (flush-to-zero) is not set.
270
271 // Add the implicit '1' bit to the mantissa.
272 mantissa += (1 << 23);
273 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
274 }
275 }
276
277 VIXL_UNREACHABLE();
278 return 0;
279 }
280
281
FPToFloat16(double value,FPRounding round_mode)282 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
283 // Only the FPTieEven rounding mode is implemented.
284 VIXL_ASSERT(round_mode == FPTieEven);
285 USE(round_mode);
286
287 uint64_t raw = double_to_rawbits(value);
288 int32_t sign = raw >> 63;
289 int64_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
290 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
291
292 switch (std::fpclassify(value)) {
293 case FP_NAN: {
294 if (IsSignallingNaN(value)) {
295 FPProcessException();
296 }
297 if (DN()) return kFP16DefaultNaN;
298
299 // Convert NaNs as the processor would:
300 // - The sign is propagated.
301 // - The payload (mantissa) is transferred as much as possible, except
302 // that the top bit is forced to '1', making the result a quiet NaN.
303 float16 result = (sign == 0) ? kFP16PositiveInfinity
304 : kFP16NegativeInfinity;
305 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
306 result |= (1 << 9); // Force a quiet NaN;
307 return result;
308 }
309
310 case FP_ZERO:
311 return (sign == 0) ? 0 : 0x8000;
312
313 case FP_INFINITE:
314 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
315
316 case FP_NORMAL:
317 case FP_SUBNORMAL: {
318 // Convert double-to-half as the processor would, assuming that FPCR.FZ
319 // (flush-to-zero) is not set.
320
321 // Add the implicit '1' bit to the mantissa.
322 mantissa += (UINT64_C(1) << 52);
323 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
324 }
325 }
326
327 VIXL_UNREACHABLE();
328 return 0;
329 }
330
331
FPToFloat(double value,FPRounding round_mode)332 float Simulator::FPToFloat(double value, FPRounding round_mode) {
333 // Only the FPTieEven rounding mode is implemented.
334 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
335 USE(round_mode);
336
337 switch (std::fpclassify(value)) {
338 case FP_NAN: {
339 if (IsSignallingNaN(value)) {
340 FPProcessException();
341 }
342 if (DN()) return kFP32DefaultNaN;
343
344 // Convert NaNs as the processor would:
345 // - The sign is propagated.
346 // - The payload (mantissa) is transferred as much as possible, except
347 // that the top bit is forced to '1', making the result a quiet NaN.
348 uint64_t raw = double_to_rawbits(value);
349
350 uint32_t sign = raw >> 63;
351 uint32_t exponent = (1 << 8) - 1;
352 uint32_t payload =
353 static_cast<uint32_t>(unsigned_bitextract_64(50, 52 - 23, raw));
354 payload |= (1 << 22); // Force a quiet NaN.
355
356 return rawbits_to_float((sign << 31) | (exponent << 23) | payload);
357 }
358
359 case FP_ZERO:
360 case FP_INFINITE: {
361 // In a C++ cast, any value representable in the target type will be
362 // unchanged. This is always the case for +/-0.0 and infinities.
363 return static_cast<float>(value);
364 }
365
366 case FP_NORMAL:
367 case FP_SUBNORMAL: {
368 // Convert double-to-float as the processor would, assuming that FPCR.FZ
369 // (flush-to-zero) is not set.
370 uint64_t raw = double_to_rawbits(value);
371 // Extract the IEEE-754 double components.
372 uint32_t sign = raw >> 63;
373 // Extract the exponent and remove the IEEE-754 encoding bias.
374 int32_t exponent =
375 static_cast<int32_t>(unsigned_bitextract_64(62, 52, raw)) - 1023;
376 // Extract the mantissa and add the implicit '1' bit.
377 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
378 if (std::fpclassify(value) == FP_NORMAL) {
379 mantissa |= (UINT64_C(1) << 52);
380 }
381 return FPRoundToFloat(sign, exponent, mantissa, round_mode);
382 }
383 }
384
385 VIXL_UNREACHABLE();
386 return value;
387 }
388
389
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)390 void Simulator::ld1(VectorFormat vform,
391 LogicVRegister dst,
392 uint64_t addr) {
393 dst.ClearForWrite(vform);
394 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
395 dst.ReadUintFromMem(vform, i, addr);
396 addr += LaneSizeInBytesFromFormat(vform);
397 }
398 }
399
400
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)401 void Simulator::ld1(VectorFormat vform,
402 LogicVRegister dst,
403 int index,
404 uint64_t addr) {
405 dst.ReadUintFromMem(vform, index, addr);
406 }
407
408
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)409 void Simulator::ld1r(VectorFormat vform,
410 LogicVRegister dst,
411 uint64_t addr) {
412 dst.ClearForWrite(vform);
413 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
414 dst.ReadUintFromMem(vform, i, addr);
415 }
416 }
417
418
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)419 void Simulator::ld2(VectorFormat vform,
420 LogicVRegister dst1,
421 LogicVRegister dst2,
422 uint64_t addr1) {
423 dst1.ClearForWrite(vform);
424 dst2.ClearForWrite(vform);
425 int esize = LaneSizeInBytesFromFormat(vform);
426 uint64_t addr2 = addr1 + esize;
427 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
428 dst1.ReadUintFromMem(vform, i, addr1);
429 dst2.ReadUintFromMem(vform, i, addr2);
430 addr1 += 2 * esize;
431 addr2 += 2 * esize;
432 }
433 }
434
435
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)436 void Simulator::ld2(VectorFormat vform,
437 LogicVRegister dst1,
438 LogicVRegister dst2,
439 int index,
440 uint64_t addr1) {
441 dst1.ClearForWrite(vform);
442 dst2.ClearForWrite(vform);
443 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
444 dst1.ReadUintFromMem(vform, index, addr1);
445 dst2.ReadUintFromMem(vform, index, addr2);
446 }
447
448
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)449 void Simulator::ld2r(VectorFormat vform,
450 LogicVRegister dst1,
451 LogicVRegister dst2,
452 uint64_t addr) {
453 dst1.ClearForWrite(vform);
454 dst2.ClearForWrite(vform);
455 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
456 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
457 dst1.ReadUintFromMem(vform, i, addr);
458 dst2.ReadUintFromMem(vform, i, addr2);
459 }
460 }
461
462
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)463 void Simulator::ld3(VectorFormat vform,
464 LogicVRegister dst1,
465 LogicVRegister dst2,
466 LogicVRegister dst3,
467 uint64_t addr1) {
468 dst1.ClearForWrite(vform);
469 dst2.ClearForWrite(vform);
470 dst3.ClearForWrite(vform);
471 int esize = LaneSizeInBytesFromFormat(vform);
472 uint64_t addr2 = addr1 + esize;
473 uint64_t addr3 = addr2 + esize;
474 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
475 dst1.ReadUintFromMem(vform, i, addr1);
476 dst2.ReadUintFromMem(vform, i, addr2);
477 dst3.ReadUintFromMem(vform, i, addr3);
478 addr1 += 3 * esize;
479 addr2 += 3 * esize;
480 addr3 += 3 * esize;
481 }
482 }
483
484
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)485 void Simulator::ld3(VectorFormat vform,
486 LogicVRegister dst1,
487 LogicVRegister dst2,
488 LogicVRegister dst3,
489 int index,
490 uint64_t addr1) {
491 dst1.ClearForWrite(vform);
492 dst2.ClearForWrite(vform);
493 dst3.ClearForWrite(vform);
494 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
495 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
496 dst1.ReadUintFromMem(vform, index, addr1);
497 dst2.ReadUintFromMem(vform, index, addr2);
498 dst3.ReadUintFromMem(vform, index, addr3);
499 }
500
501
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)502 void Simulator::ld3r(VectorFormat vform,
503 LogicVRegister dst1,
504 LogicVRegister dst2,
505 LogicVRegister dst3,
506 uint64_t addr) {
507 dst1.ClearForWrite(vform);
508 dst2.ClearForWrite(vform);
509 dst3.ClearForWrite(vform);
510 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
511 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
512 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
513 dst1.ReadUintFromMem(vform, i, addr);
514 dst2.ReadUintFromMem(vform, i, addr2);
515 dst3.ReadUintFromMem(vform, i, addr3);
516 }
517 }
518
519
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)520 void Simulator::ld4(VectorFormat vform,
521 LogicVRegister dst1,
522 LogicVRegister dst2,
523 LogicVRegister dst3,
524 LogicVRegister dst4,
525 uint64_t addr1) {
526 dst1.ClearForWrite(vform);
527 dst2.ClearForWrite(vform);
528 dst3.ClearForWrite(vform);
529 dst4.ClearForWrite(vform);
530 int esize = LaneSizeInBytesFromFormat(vform);
531 uint64_t addr2 = addr1 + esize;
532 uint64_t addr3 = addr2 + esize;
533 uint64_t addr4 = addr3 + esize;
534 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
535 dst1.ReadUintFromMem(vform, i, addr1);
536 dst2.ReadUintFromMem(vform, i, addr2);
537 dst3.ReadUintFromMem(vform, i, addr3);
538 dst4.ReadUintFromMem(vform, i, addr4);
539 addr1 += 4 * esize;
540 addr2 += 4 * esize;
541 addr3 += 4 * esize;
542 addr4 += 4 * esize;
543 }
544 }
545
546
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)547 void Simulator::ld4(VectorFormat vform,
548 LogicVRegister dst1,
549 LogicVRegister dst2,
550 LogicVRegister dst3,
551 LogicVRegister dst4,
552 int index,
553 uint64_t addr1) {
554 dst1.ClearForWrite(vform);
555 dst2.ClearForWrite(vform);
556 dst3.ClearForWrite(vform);
557 dst4.ClearForWrite(vform);
558 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
559 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
560 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
561 dst1.ReadUintFromMem(vform, index, addr1);
562 dst2.ReadUintFromMem(vform, index, addr2);
563 dst3.ReadUintFromMem(vform, index, addr3);
564 dst4.ReadUintFromMem(vform, index, addr4);
565 }
566
567
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)568 void Simulator::ld4r(VectorFormat vform,
569 LogicVRegister dst1,
570 LogicVRegister dst2,
571 LogicVRegister dst3,
572 LogicVRegister dst4,
573 uint64_t addr) {
574 dst1.ClearForWrite(vform);
575 dst2.ClearForWrite(vform);
576 dst3.ClearForWrite(vform);
577 dst4.ClearForWrite(vform);
578 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
579 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
580 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
581 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
582 dst1.ReadUintFromMem(vform, i, addr);
583 dst2.ReadUintFromMem(vform, i, addr2);
584 dst3.ReadUintFromMem(vform, i, addr3);
585 dst4.ReadUintFromMem(vform, i, addr4);
586 }
587 }
588
589
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)590 void Simulator::st1(VectorFormat vform,
591 LogicVRegister src,
592 uint64_t addr) {
593 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
594 src.WriteUintToMem(vform, i, addr);
595 addr += LaneSizeInBytesFromFormat(vform);
596 }
597 }
598
599
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)600 void Simulator::st1(VectorFormat vform,
601 LogicVRegister src,
602 int index,
603 uint64_t addr) {
604 src.WriteUintToMem(vform, index, addr);
605 }
606
607
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,uint64_t addr)608 void Simulator::st2(VectorFormat vform,
609 LogicVRegister dst,
610 LogicVRegister dst2,
611 uint64_t addr) {
612 int esize = LaneSizeInBytesFromFormat(vform);
613 uint64_t addr2 = addr + esize;
614 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
615 dst.WriteUintToMem(vform, i, addr);
616 dst2.WriteUintToMem(vform, i, addr2);
617 addr += 2 * esize;
618 addr2 += 2 * esize;
619 }
620 }
621
622
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,int index,uint64_t addr)623 void Simulator::st2(VectorFormat vform,
624 LogicVRegister dst,
625 LogicVRegister dst2,
626 int index,
627 uint64_t addr) {
628 int esize = LaneSizeInBytesFromFormat(vform);
629 dst.WriteUintToMem(vform, index, addr);
630 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
631 }
632
633
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)634 void Simulator::st3(VectorFormat vform,
635 LogicVRegister dst,
636 LogicVRegister dst2,
637 LogicVRegister dst3,
638 uint64_t addr) {
639 int esize = LaneSizeInBytesFromFormat(vform);
640 uint64_t addr2 = addr + esize;
641 uint64_t addr3 = addr2 + esize;
642 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
643 dst.WriteUintToMem(vform, i, addr);
644 dst2.WriteUintToMem(vform, i, addr2);
645 dst3.WriteUintToMem(vform, i, addr3);
646 addr += 3 * esize;
647 addr2 += 3 * esize;
648 addr3 += 3 * esize;
649 }
650 }
651
652
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr)653 void Simulator::st3(VectorFormat vform,
654 LogicVRegister dst,
655 LogicVRegister dst2,
656 LogicVRegister dst3,
657 int index,
658 uint64_t addr) {
659 int esize = LaneSizeInBytesFromFormat(vform);
660 dst.WriteUintToMem(vform, index, addr);
661 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
662 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
663 }
664
665
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)666 void Simulator::st4(VectorFormat vform,
667 LogicVRegister dst,
668 LogicVRegister dst2,
669 LogicVRegister dst3,
670 LogicVRegister dst4,
671 uint64_t addr) {
672 int esize = LaneSizeInBytesFromFormat(vform);
673 uint64_t addr2 = addr + esize;
674 uint64_t addr3 = addr2 + esize;
675 uint64_t addr4 = addr3 + esize;
676 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
677 dst.WriteUintToMem(vform, i, addr);
678 dst2.WriteUintToMem(vform, i, addr2);
679 dst3.WriteUintToMem(vform, i, addr3);
680 dst4.WriteUintToMem(vform, i, addr4);
681 addr += 4 * esize;
682 addr2 += 4 * esize;
683 addr3 += 4 * esize;
684 addr4 += 4 * esize;
685 }
686 }
687
688
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr)689 void Simulator::st4(VectorFormat vform,
690 LogicVRegister dst,
691 LogicVRegister dst2,
692 LogicVRegister dst3,
693 LogicVRegister dst4,
694 int index,
695 uint64_t addr) {
696 int esize = LaneSizeInBytesFromFormat(vform);
697 dst.WriteUintToMem(vform, index, addr);
698 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
699 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
700 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
701 }
702
703
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)704 LogicVRegister Simulator::cmp(VectorFormat vform,
705 LogicVRegister dst,
706 const LogicVRegister& src1,
707 const LogicVRegister& src2,
708 Condition cond) {
709 dst.ClearForWrite(vform);
710 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
711 int64_t sa = src1.Int(vform, i);
712 int64_t sb = src2.Int(vform, i);
713 uint64_t ua = src1.Uint(vform, i);
714 uint64_t ub = src2.Uint(vform, i);
715 bool result = false;
716 switch (cond) {
717 case eq: result = (ua == ub); break;
718 case ge: result = (sa >= sb); break;
719 case gt: result = (sa > sb) ; break;
720 case hi: result = (ua > ub) ; break;
721 case hs: result = (ua >= ub); break;
722 case lt: result = (sa < sb) ; break;
723 case le: result = (sa <= sb); break;
724 default: VIXL_UNREACHABLE(); break;
725 }
726 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
727 }
728 return dst;
729 }
730
731
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)732 LogicVRegister Simulator::cmp(VectorFormat vform,
733 LogicVRegister dst,
734 const LogicVRegister& src1,
735 int imm,
736 Condition cond) {
737 SimVRegister temp;
738 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
739 return cmp(vform, dst, src1, imm_reg, cond);
740 }
741
742
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)743 LogicVRegister Simulator::cmptst(VectorFormat vform,
744 LogicVRegister dst,
745 const LogicVRegister& src1,
746 const LogicVRegister& src2) {
747 dst.ClearForWrite(vform);
748 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
749 uint64_t ua = src1.Uint(vform, i);
750 uint64_t ub = src2.Uint(vform, i);
751 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
752 }
753 return dst;
754 }
755
756
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)757 LogicVRegister Simulator::add(VectorFormat vform,
758 LogicVRegister dst,
759 const LogicVRegister& src1,
760 const LogicVRegister& src2) {
761 dst.ClearForWrite(vform);
762 // TODO(all): consider assigning the result of LaneCountFromFormat to a local.
763 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
764 // Test for unsigned saturation.
765 uint64_t ua = src1.UintLeftJustified(vform, i);
766 uint64_t ub = src2.UintLeftJustified(vform, i);
767 uint64_t ur = ua + ub;
768 if (ur < ua) {
769 dst.SetUnsignedSat(i, true);
770 }
771
772 // Test for signed saturation.
773 int64_t sa = src1.IntLeftJustified(vform, i);
774 int64_t sb = src2.IntLeftJustified(vform, i);
775 int64_t sr = sa + sb;
776 // If the signs of the operands are the same, but different from the result,
777 // there was an overflow.
778 if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
779 dst.SetSignedSat(i, sa >= 0);
780 }
781
782 dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i));
783 }
784 return dst;
785 }
786
787
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)788 LogicVRegister Simulator::addp(VectorFormat vform,
789 LogicVRegister dst,
790 const LogicVRegister& src1,
791 const LogicVRegister& src2) {
792 SimVRegister temp1, temp2;
793 uzp1(vform, temp1, src1, src2);
794 uzp2(vform, temp2, src1, src2);
795 add(vform, dst, temp1, temp2);
796 return dst;
797 }
798
799
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)800 LogicVRegister Simulator::mla(VectorFormat vform,
801 LogicVRegister dst,
802 const LogicVRegister& src1,
803 const LogicVRegister& src2) {
804 SimVRegister temp;
805 mul(vform, temp, src1, src2);
806 add(vform, dst, dst, temp);
807 return dst;
808 }
809
810
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)811 LogicVRegister Simulator::mls(VectorFormat vform,
812 LogicVRegister dst,
813 const LogicVRegister& src1,
814 const LogicVRegister& src2) {
815 SimVRegister temp;
816 mul(vform, temp, src1, src2);
817 sub(vform, dst, dst, temp);
818 return dst;
819 }
820
821
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)822 LogicVRegister Simulator::mul(VectorFormat vform,
823 LogicVRegister dst,
824 const LogicVRegister& src1,
825 const LogicVRegister& src2) {
826 dst.ClearForWrite(vform);
827 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
828 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
829 }
830 return dst;
831 }
832
833
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)834 LogicVRegister Simulator::mul(VectorFormat vform,
835 LogicVRegister dst,
836 const LogicVRegister& src1,
837 const LogicVRegister& src2,
838 int index) {
839 SimVRegister temp;
840 VectorFormat indexform = VectorFormatFillQ(vform);
841 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
842 }
843
844
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)845 LogicVRegister Simulator::mla(VectorFormat vform,
846 LogicVRegister dst,
847 const LogicVRegister& src1,
848 const LogicVRegister& src2,
849 int index) {
850 SimVRegister temp;
851 VectorFormat indexform = VectorFormatFillQ(vform);
852 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
853 }
854
855
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)856 LogicVRegister Simulator::mls(VectorFormat vform,
857 LogicVRegister dst,
858 const LogicVRegister& src1,
859 const LogicVRegister& src2,
860 int index) {
861 SimVRegister temp;
862 VectorFormat indexform = VectorFormatFillQ(vform);
863 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
864 }
865
866
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)867 LogicVRegister Simulator::smull(VectorFormat vform,
868 LogicVRegister dst,
869 const LogicVRegister& src1,
870 const LogicVRegister& src2,
871 int index) {
872 SimVRegister temp;
873 VectorFormat indexform =
874 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
875 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
876 }
877
878
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)879 LogicVRegister Simulator::smull2(VectorFormat vform,
880 LogicVRegister dst,
881 const LogicVRegister& src1,
882 const LogicVRegister& src2,
883 int index) {
884 SimVRegister temp;
885 VectorFormat indexform =
886 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
887 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
888 }
889
890
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)891 LogicVRegister Simulator::umull(VectorFormat vform,
892 LogicVRegister dst,
893 const LogicVRegister& src1,
894 const LogicVRegister& src2,
895 int index) {
896 SimVRegister temp;
897 VectorFormat indexform =
898 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
899 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
900 }
901
902
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)903 LogicVRegister Simulator::umull2(VectorFormat vform,
904 LogicVRegister dst,
905 const LogicVRegister& src1,
906 const LogicVRegister& src2,
907 int index) {
908 SimVRegister temp;
909 VectorFormat indexform =
910 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
911 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
912 }
913
914
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)915 LogicVRegister Simulator::smlal(VectorFormat vform,
916 LogicVRegister dst,
917 const LogicVRegister& src1,
918 const LogicVRegister& src2,
919 int index) {
920 SimVRegister temp;
921 VectorFormat indexform =
922 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
923 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
924 }
925
926
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)927 LogicVRegister Simulator::smlal2(VectorFormat vform,
928 LogicVRegister dst,
929 const LogicVRegister& src1,
930 const LogicVRegister& src2,
931 int index) {
932 SimVRegister temp;
933 VectorFormat indexform =
934 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
935 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
936 }
937
938
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)939 LogicVRegister Simulator::umlal(VectorFormat vform,
940 LogicVRegister dst,
941 const LogicVRegister& src1,
942 const LogicVRegister& src2,
943 int index) {
944 SimVRegister temp;
945 VectorFormat indexform =
946 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
947 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
948 }
949
950
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)951 LogicVRegister Simulator::umlal2(VectorFormat vform,
952 LogicVRegister dst,
953 const LogicVRegister& src1,
954 const LogicVRegister& src2,
955 int index) {
956 SimVRegister temp;
957 VectorFormat indexform =
958 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
959 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
960 }
961
962
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)963 LogicVRegister Simulator::smlsl(VectorFormat vform,
964 LogicVRegister dst,
965 const LogicVRegister& src1,
966 const LogicVRegister& src2,
967 int index) {
968 SimVRegister temp;
969 VectorFormat indexform =
970 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
971 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
972 }
973
974
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)975 LogicVRegister Simulator::smlsl2(VectorFormat vform,
976 LogicVRegister dst,
977 const LogicVRegister& src1,
978 const LogicVRegister& src2,
979 int index) {
980 SimVRegister temp;
981 VectorFormat indexform =
982 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
983 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
984 }
985
986
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)987 LogicVRegister Simulator::umlsl(VectorFormat vform,
988 LogicVRegister dst,
989 const LogicVRegister& src1,
990 const LogicVRegister& src2,
991 int index) {
992 SimVRegister temp;
993 VectorFormat indexform =
994 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
995 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
996 }
997
998
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)999 LogicVRegister Simulator::umlsl2(VectorFormat vform,
1000 LogicVRegister dst,
1001 const LogicVRegister& src1,
1002 const LogicVRegister& src2,
1003 int index) {
1004 SimVRegister temp;
1005 VectorFormat indexform =
1006 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1007 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1008 }
1009
1010
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1011 LogicVRegister Simulator::sqdmull(VectorFormat vform,
1012 LogicVRegister dst,
1013 const LogicVRegister& src1,
1014 const LogicVRegister& src2,
1015 int index) {
1016 SimVRegister temp;
1017 VectorFormat indexform =
1018 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1019 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
1020 }
1021
1022
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1023 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
1024 LogicVRegister dst,
1025 const LogicVRegister& src1,
1026 const LogicVRegister& src2,
1027 int index) {
1028 SimVRegister temp;
1029 VectorFormat indexform =
1030 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1031 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1032 }
1033
1034
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1035 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
1036 LogicVRegister dst,
1037 const LogicVRegister& src1,
1038 const LogicVRegister& src2,
1039 int index) {
1040 SimVRegister temp;
1041 VectorFormat indexform =
1042 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1043 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
1044 }
1045
1046
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1047 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
1048 LogicVRegister dst,
1049 const LogicVRegister& src1,
1050 const LogicVRegister& src2,
1051 int index) {
1052 SimVRegister temp;
1053 VectorFormat indexform =
1054 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1055 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1056 }
1057
1058
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1059 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
1060 LogicVRegister dst,
1061 const LogicVRegister& src1,
1062 const LogicVRegister& src2,
1063 int index) {
1064 SimVRegister temp;
1065 VectorFormat indexform =
1066 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1067 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1068 }
1069
1070
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1071 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
1072 LogicVRegister dst,
1073 const LogicVRegister& src1,
1074 const LogicVRegister& src2,
1075 int index) {
1076 SimVRegister temp;
1077 VectorFormat indexform =
1078 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1079 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1080 }
1081
1082
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1083 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
1084 LogicVRegister dst,
1085 const LogicVRegister& src1,
1086 const LogicVRegister& src2,
1087 int index) {
1088 SimVRegister temp;
1089 VectorFormat indexform = VectorFormatFillQ(vform);
1090 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1091 }
1092
1093
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1094 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
1095 LogicVRegister dst,
1096 const LogicVRegister& src1,
1097 const LogicVRegister& src2,
1098 int index) {
1099 SimVRegister temp;
1100 VectorFormat indexform = VectorFormatFillQ(vform);
1101 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1102 }
1103
1104
PolynomialMult(uint8_t op1,uint8_t op2)1105 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
1106 uint16_t result = 0;
1107 uint16_t extended_op2 = op2;
1108 for (int i = 0; i < 8; ++i) {
1109 if ((op1 >> i) & 1) {
1110 result = result ^ (extended_op2 << i);
1111 }
1112 }
1113 return result;
1114 }
1115
1116
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1117 LogicVRegister Simulator::pmul(VectorFormat vform,
1118 LogicVRegister dst,
1119 const LogicVRegister& src1,
1120 const LogicVRegister& src2) {
1121 dst.ClearForWrite(vform);
1122 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1123 dst.SetUint(vform, i,
1124 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
1125 }
1126 return dst;
1127 }
1128
1129
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1130 LogicVRegister Simulator::pmull(VectorFormat vform,
1131 LogicVRegister dst,
1132 const LogicVRegister& src1,
1133 const LogicVRegister& src2) {
1134 VectorFormat vform_src = VectorFormatHalfWidth(vform);
1135 dst.ClearForWrite(vform);
1136 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1137 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i),
1138 src2.Uint(vform_src, i)));
1139 }
1140 return dst;
1141 }
1142
1143
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1144 LogicVRegister Simulator::pmull2(VectorFormat vform,
1145 LogicVRegister dst,
1146 const LogicVRegister& src1,
1147 const LogicVRegister& src2) {
1148 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
1149 dst.ClearForWrite(vform);
1150 int lane_count = LaneCountFromFormat(vform);
1151 for (int i = 0; i < lane_count; i++) {
1152 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i),
1153 src2.Uint(vform_src, lane_count + i)));
1154 }
1155 return dst;
1156 }
1157
1158
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1159 LogicVRegister Simulator::sub(VectorFormat vform,
1160 LogicVRegister dst,
1161 const LogicVRegister& src1,
1162 const LogicVRegister& src2) {
1163 dst.ClearForWrite(vform);
1164 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1165 // Test for unsigned saturation.
1166 if (src2.Uint(vform, i) > src1.Uint(vform, i)) {
1167 dst.SetUnsignedSat(i, false);
1168 }
1169
1170 // Test for signed saturation.
1171 int64_t sa = src1.IntLeftJustified(vform, i);
1172 int64_t sb = src2.IntLeftJustified(vform, i);
1173 int64_t sr = sa - sb;
1174 // If the signs of the operands are different, and the sign of the first
1175 // operand doesn't match the result, there was an overflow.
1176 if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
1177 dst.SetSignedSat(i, sr < 0);
1178 }
1179
1180 dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i));
1181 }
1182 return dst;
1183 }
1184
1185
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1186 LogicVRegister Simulator::and_(VectorFormat vform,
1187 LogicVRegister dst,
1188 const LogicVRegister& src1,
1189 const LogicVRegister& src2) {
1190 dst.ClearForWrite(vform);
1191 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1192 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1193 }
1194 return dst;
1195 }
1196
1197
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1198 LogicVRegister Simulator::orr(VectorFormat vform,
1199 LogicVRegister dst,
1200 const LogicVRegister& src1,
1201 const LogicVRegister& src2) {
1202 dst.ClearForWrite(vform);
1203 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1204 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1205 }
1206 return dst;
1207 }
1208
1209
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1210 LogicVRegister Simulator::orn(VectorFormat vform,
1211 LogicVRegister dst,
1212 const LogicVRegister& src1,
1213 const LogicVRegister& src2) {
1214 dst.ClearForWrite(vform);
1215 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1216 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1217 }
1218 return dst;
1219 }
1220
1221
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1222 LogicVRegister Simulator::eor(VectorFormat vform,
1223 LogicVRegister dst,
1224 const LogicVRegister& src1,
1225 const LogicVRegister& src2) {
1226 dst.ClearForWrite(vform);
1227 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1228 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1229 }
1230 return dst;
1231 }
1232
1233
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1234 LogicVRegister Simulator::bic(VectorFormat vform,
1235 LogicVRegister dst,
1236 const LogicVRegister& src1,
1237 const LogicVRegister& src2) {
1238 dst.ClearForWrite(vform);
1239 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1240 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1241 }
1242 return dst;
1243 }
1244
1245
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1246 LogicVRegister Simulator::bic(VectorFormat vform,
1247 LogicVRegister dst,
1248 const LogicVRegister& src,
1249 uint64_t imm) {
1250 uint64_t result[16];
1251 int laneCount = LaneCountFromFormat(vform);
1252 for (int i = 0; i < laneCount; ++i) {
1253 result[i] = src.Uint(vform, i) & ~imm;
1254 }
1255 dst.ClearForWrite(vform);
1256 for (int i = 0; i < laneCount; ++i) {
1257 dst.SetUint(vform, i, result[i]);
1258 }
1259 return dst;
1260 }
1261
1262
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1263 LogicVRegister Simulator::bif(VectorFormat vform,
1264 LogicVRegister dst,
1265 const LogicVRegister& src1,
1266 const LogicVRegister& src2) {
1267 dst.ClearForWrite(vform);
1268 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1269 uint64_t operand1 = dst.Uint(vform, i);
1270 uint64_t operand2 = ~src2.Uint(vform, i);
1271 uint64_t operand3 = src1.Uint(vform, i);
1272 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1273 dst.SetUint(vform, i, result);
1274 }
1275 return dst;
1276 }
1277
1278
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1279 LogicVRegister Simulator::bit(VectorFormat vform,
1280 LogicVRegister dst,
1281 const LogicVRegister& src1,
1282 const LogicVRegister& src2) {
1283 dst.ClearForWrite(vform);
1284 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1285 uint64_t operand1 = dst.Uint(vform, i);
1286 uint64_t operand2 = src2.Uint(vform, i);
1287 uint64_t operand3 = src1.Uint(vform, i);
1288 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1289 dst.SetUint(vform, i, result);
1290 }
1291 return dst;
1292 }
1293
1294
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1295 LogicVRegister Simulator::bsl(VectorFormat vform,
1296 LogicVRegister dst,
1297 const LogicVRegister& src1,
1298 const LogicVRegister& src2) {
1299 dst.ClearForWrite(vform);
1300 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1301 uint64_t operand1 = src2.Uint(vform, i);
1302 uint64_t operand2 = dst.Uint(vform, i);
1303 uint64_t operand3 = src1.Uint(vform, i);
1304 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1305 dst.SetUint(vform, i, result);
1306 }
1307 return dst;
1308 }
1309
1310
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1311 LogicVRegister Simulator::sminmax(VectorFormat vform,
1312 LogicVRegister dst,
1313 const LogicVRegister& src1,
1314 const LogicVRegister& src2,
1315 bool max) {
1316 dst.ClearForWrite(vform);
1317 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1318 int64_t src1_val = src1.Int(vform, i);
1319 int64_t src2_val = src2.Int(vform, i);
1320 int64_t dst_val;
1321 if (max == true) {
1322 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1323 } else {
1324 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1325 }
1326 dst.SetInt(vform, i, dst_val);
1327 }
1328 return dst;
1329 }
1330
1331
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1332 LogicVRegister Simulator::smax(VectorFormat vform,
1333 LogicVRegister dst,
1334 const LogicVRegister& src1,
1335 const LogicVRegister& src2) {
1336 return sminmax(vform, dst, src1, src2, true);
1337 }
1338
1339
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1340 LogicVRegister Simulator::smin(VectorFormat vform,
1341 LogicVRegister dst,
1342 const LogicVRegister& src1,
1343 const LogicVRegister& src2) {
1344 return sminmax(vform, dst, src1, src2, false);
1345 }
1346
1347
sminmaxp(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,bool max)1348 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1349 LogicVRegister dst,
1350 int dst_index,
1351 const LogicVRegister& src,
1352 bool max) {
1353 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1354 int64_t src1_val = src.Int(vform, i);
1355 int64_t src2_val = src.Int(vform, i + 1);
1356 int64_t dst_val;
1357 if (max == true) {
1358 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1359 } else {
1360 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1361 }
1362 dst.SetInt(vform, dst_index + (i >> 1), dst_val);
1363 }
1364 return dst;
1365 }
1366
1367
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1368 LogicVRegister Simulator::smaxp(VectorFormat vform,
1369 LogicVRegister dst,
1370 const LogicVRegister& src1,
1371 const LogicVRegister& src2) {
1372 dst.ClearForWrite(vform);
1373 sminmaxp(vform, dst, 0, src1, true);
1374 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
1375 return dst;
1376 }
1377
1378
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1379 LogicVRegister Simulator::sminp(VectorFormat vform,
1380 LogicVRegister dst,
1381 const LogicVRegister& src1,
1382 const LogicVRegister& src2) {
1383 dst.ClearForWrite(vform);
1384 sminmaxp(vform, dst, 0, src1, false);
1385 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
1386 return dst;
1387 }
1388
1389
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1390 LogicVRegister Simulator::addp(VectorFormat vform,
1391 LogicVRegister dst,
1392 const LogicVRegister& src) {
1393 VIXL_ASSERT(vform == kFormatD);
1394
1395 int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1);
1396 dst.ClearForWrite(vform);
1397 dst.SetInt(vform, 0, dst_val);
1398 return dst;
1399 }
1400
1401
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1402 LogicVRegister Simulator::addv(VectorFormat vform,
1403 LogicVRegister dst,
1404 const LogicVRegister& src) {
1405 VectorFormat vform_dst
1406 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1407
1408
1409 int64_t dst_val = 0;
1410 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1411 dst_val += src.Int(vform, i);
1412 }
1413
1414 dst.ClearForWrite(vform_dst);
1415 dst.SetInt(vform_dst, 0, dst_val);
1416 return dst;
1417 }
1418
1419
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1420 LogicVRegister Simulator::saddlv(VectorFormat vform,
1421 LogicVRegister dst,
1422 const LogicVRegister& src) {
1423 VectorFormat vform_dst
1424 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1425
1426 int64_t dst_val = 0;
1427 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1428 dst_val += src.Int(vform, i);
1429 }
1430
1431 dst.ClearForWrite(vform_dst);
1432 dst.SetInt(vform_dst, 0, dst_val);
1433 return dst;
1434 }
1435
1436
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1437 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1438 LogicVRegister dst,
1439 const LogicVRegister& src) {
1440 VectorFormat vform_dst
1441 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1442
1443 uint64_t dst_val = 0;
1444 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1445 dst_val += src.Uint(vform, i);
1446 }
1447
1448 dst.ClearForWrite(vform_dst);
1449 dst.SetUint(vform_dst, 0, dst_val);
1450 return dst;
1451 }
1452
1453
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1454 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1455 LogicVRegister dst,
1456 const LogicVRegister& src,
1457 bool max) {
1458 dst.ClearForWrite(vform);
1459 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1460 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1461 dst.SetInt(vform, i, 0);
1462 int64_t src_val = src.Int(vform, i);
1463 if (max == true) {
1464 dst_val = (src_val > dst_val) ? src_val : dst_val;
1465 } else {
1466 dst_val = (src_val < dst_val) ? src_val : dst_val;
1467 }
1468 }
1469 dst.SetInt(vform, 0, dst_val);
1470 return dst;
1471 }
1472
1473
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1474 LogicVRegister Simulator::smaxv(VectorFormat vform,
1475 LogicVRegister dst,
1476 const LogicVRegister& src) {
1477 sminmaxv(vform, dst, src, true);
1478 return dst;
1479 }
1480
1481
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1482 LogicVRegister Simulator::sminv(VectorFormat vform,
1483 LogicVRegister dst,
1484 const LogicVRegister& src) {
1485 sminmaxv(vform, dst, src, false);
1486 return dst;
1487 }
1488
1489
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1490 LogicVRegister Simulator::uminmax(VectorFormat vform,
1491 LogicVRegister dst,
1492 const LogicVRegister& src1,
1493 const LogicVRegister& src2,
1494 bool max) {
1495 dst.ClearForWrite(vform);
1496 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1497 uint64_t src1_val = src1.Uint(vform, i);
1498 uint64_t src2_val = src2.Uint(vform, i);
1499 uint64_t dst_val;
1500 if (max == true) {
1501 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1502 } else {
1503 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1504 }
1505 dst.SetUint(vform, i, dst_val);
1506 }
1507 return dst;
1508 }
1509
1510
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1511 LogicVRegister Simulator::umax(VectorFormat vform,
1512 LogicVRegister dst,
1513 const LogicVRegister& src1,
1514 const LogicVRegister& src2) {
1515 return uminmax(vform, dst, src1, src2, true);
1516 }
1517
1518
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1519 LogicVRegister Simulator::umin(VectorFormat vform,
1520 LogicVRegister dst,
1521 const LogicVRegister& src1,
1522 const LogicVRegister& src2) {
1523 return uminmax(vform, dst, src1, src2, false);
1524 }
1525
1526
uminmaxp(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,bool max)1527 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1528 LogicVRegister dst,
1529 int dst_index,
1530 const LogicVRegister& src,
1531 bool max) {
1532 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1533 uint64_t src1_val = src.Uint(vform, i);
1534 uint64_t src2_val = src.Uint(vform, i + 1);
1535 uint64_t dst_val;
1536 if (max == true) {
1537 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1538 } else {
1539 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1540 }
1541 dst.SetUint(vform, dst_index + (i >> 1), dst_val);
1542 }
1543 return dst;
1544 }
1545
1546
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1547 LogicVRegister Simulator::umaxp(VectorFormat vform,
1548 LogicVRegister dst,
1549 const LogicVRegister& src1,
1550 const LogicVRegister& src2) {
1551 dst.ClearForWrite(vform);
1552 uminmaxp(vform, dst, 0, src1, true);
1553 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
1554 return dst;
1555 }
1556
1557
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1558 LogicVRegister Simulator::uminp(VectorFormat vform,
1559 LogicVRegister dst,
1560 const LogicVRegister& src1,
1561 const LogicVRegister& src2) {
1562 dst.ClearForWrite(vform);
1563 uminmaxp(vform, dst, 0, src1, false);
1564 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
1565 return dst;
1566 }
1567
1568
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1569 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1570 LogicVRegister dst,
1571 const LogicVRegister& src,
1572 bool max) {
1573 dst.ClearForWrite(vform);
1574 uint64_t dst_val = max ? 0 : UINT64_MAX;
1575 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1576 dst.SetUint(vform, i, 0);
1577 uint64_t src_val = src.Uint(vform, i);
1578 if (max == true) {
1579 dst_val = (src_val > dst_val) ? src_val : dst_val;
1580 } else {
1581 dst_val = (src_val < dst_val) ? src_val : dst_val;
1582 }
1583 }
1584 dst.SetUint(vform, 0, dst_val);
1585 return dst;
1586 }
1587
1588
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1589 LogicVRegister Simulator::umaxv(VectorFormat vform,
1590 LogicVRegister dst,
1591 const LogicVRegister& src) {
1592 uminmaxv(vform, dst, src, true);
1593 return dst;
1594 }
1595
1596
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1597 LogicVRegister Simulator::uminv(VectorFormat vform,
1598 LogicVRegister dst,
1599 const LogicVRegister& src) {
1600 uminmaxv(vform, dst, src, false);
1601 return dst;
1602 }
1603
1604
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1605 LogicVRegister Simulator::shl(VectorFormat vform,
1606 LogicVRegister dst,
1607 const LogicVRegister& src,
1608 int shift) {
1609 VIXL_ASSERT(shift >= 0);
1610 SimVRegister temp;
1611 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1612 return ushl(vform, dst, src, shiftreg);
1613 }
1614
1615
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1616 LogicVRegister Simulator::sshll(VectorFormat vform,
1617 LogicVRegister dst,
1618 const LogicVRegister& src,
1619 int shift) {
1620 VIXL_ASSERT(shift >= 0);
1621 SimVRegister temp1, temp2;
1622 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1623 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1624 return sshl(vform, dst, extendedreg, shiftreg);
1625 }
1626
1627
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1628 LogicVRegister Simulator::sshll2(VectorFormat vform,
1629 LogicVRegister dst,
1630 const LogicVRegister& src,
1631 int shift) {
1632 VIXL_ASSERT(shift >= 0);
1633 SimVRegister temp1, temp2;
1634 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1635 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1636 return sshl(vform, dst, extendedreg, shiftreg);
1637 }
1638
1639
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1640 LogicVRegister Simulator::shll(VectorFormat vform,
1641 LogicVRegister dst,
1642 const LogicVRegister& src) {
1643 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1644 return sshll(vform, dst, src, shift);
1645 }
1646
1647
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1648 LogicVRegister Simulator::shll2(VectorFormat vform,
1649 LogicVRegister dst,
1650 const LogicVRegister& src) {
1651 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1652 return sshll2(vform, dst, src, shift);
1653 }
1654
1655
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1656 LogicVRegister Simulator::ushll(VectorFormat vform,
1657 LogicVRegister dst,
1658 const LogicVRegister& src,
1659 int shift) {
1660 VIXL_ASSERT(shift >= 0);
1661 SimVRegister temp1, temp2;
1662 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1663 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1664 return ushl(vform, dst, extendedreg, shiftreg);
1665 }
1666
1667
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1668 LogicVRegister Simulator::ushll2(VectorFormat vform,
1669 LogicVRegister dst,
1670 const LogicVRegister& src,
1671 int shift) {
1672 VIXL_ASSERT(shift >= 0);
1673 SimVRegister temp1, temp2;
1674 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1675 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1676 return ushl(vform, dst, extendedreg, shiftreg);
1677 }
1678
1679
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1680 LogicVRegister Simulator::sli(VectorFormat vform,
1681 LogicVRegister dst,
1682 const LogicVRegister& src,
1683 int shift) {
1684 dst.ClearForWrite(vform);
1685 int laneCount = LaneCountFromFormat(vform);
1686 for (int i = 0; i < laneCount; i++) {
1687 uint64_t src_lane = src.Uint(vform, i);
1688 uint64_t dst_lane = dst.Uint(vform, i);
1689 uint64_t shifted = src_lane << shift;
1690 uint64_t mask = MaxUintFromFormat(vform) << shift;
1691 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1692 }
1693 return dst;
1694 }
1695
1696
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1697 LogicVRegister Simulator::sqshl(VectorFormat vform,
1698 LogicVRegister dst,
1699 const LogicVRegister& src,
1700 int shift) {
1701 VIXL_ASSERT(shift >= 0);
1702 SimVRegister temp;
1703 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1704 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1705 }
1706
1707
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1708 LogicVRegister Simulator::uqshl(VectorFormat vform,
1709 LogicVRegister dst,
1710 const LogicVRegister& src,
1711 int shift) {
1712 VIXL_ASSERT(shift >= 0);
1713 SimVRegister temp;
1714 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1715 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1716 }
1717
1718
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1719 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1720 LogicVRegister dst,
1721 const LogicVRegister& src,
1722 int shift) {
1723 VIXL_ASSERT(shift >= 0);
1724 SimVRegister temp;
1725 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1726 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1727 }
1728
1729
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1730 LogicVRegister Simulator::sri(VectorFormat vform,
1731 LogicVRegister dst,
1732 const LogicVRegister& src,
1733 int shift) {
1734 dst.ClearForWrite(vform);
1735 int laneCount = LaneCountFromFormat(vform);
1736 VIXL_ASSERT((shift > 0) &&
1737 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1738 for (int i = 0; i < laneCount; i++) {
1739 uint64_t src_lane = src.Uint(vform, i);
1740 uint64_t dst_lane = dst.Uint(vform, i);
1741 uint64_t shifted;
1742 uint64_t mask;
1743 if (shift == 64) {
1744 shifted = 0;
1745 mask = 0;
1746 } else {
1747 shifted = src_lane >> shift;
1748 mask = MaxUintFromFormat(vform) >> shift;
1749 }
1750 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1751 }
1752 return dst;
1753 }
1754
1755
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1756 LogicVRegister Simulator::ushr(VectorFormat vform,
1757 LogicVRegister dst,
1758 const LogicVRegister& src,
1759 int shift) {
1760 VIXL_ASSERT(shift >= 0);
1761 SimVRegister temp;
1762 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1763 return ushl(vform, dst, src, shiftreg);
1764 }
1765
1766
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1767 LogicVRegister Simulator::sshr(VectorFormat vform,
1768 LogicVRegister dst,
1769 const LogicVRegister& src,
1770 int shift) {
1771 VIXL_ASSERT(shift >= 0);
1772 SimVRegister temp;
1773 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1774 return sshl(vform, dst, src, shiftreg);
1775 }
1776
1777
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1778 LogicVRegister Simulator::ssra(VectorFormat vform,
1779 LogicVRegister dst,
1780 const LogicVRegister& src,
1781 int shift) {
1782 SimVRegister temp;
1783 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1784 return add(vform, dst, dst, shifted_reg);
1785 }
1786
1787
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1788 LogicVRegister Simulator::usra(VectorFormat vform,
1789 LogicVRegister dst,
1790 const LogicVRegister& src,
1791 int shift) {
1792 SimVRegister temp;
1793 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1794 return add(vform, dst, dst, shifted_reg);
1795 }
1796
1797
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1798 LogicVRegister Simulator::srsra(VectorFormat vform,
1799 LogicVRegister dst,
1800 const LogicVRegister& src,
1801 int shift) {
1802 SimVRegister temp;
1803 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1804 return add(vform, dst, dst, shifted_reg);
1805 }
1806
1807
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1808 LogicVRegister Simulator::ursra(VectorFormat vform,
1809 LogicVRegister dst,
1810 const LogicVRegister& src,
1811 int shift) {
1812 SimVRegister temp;
1813 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1814 return add(vform, dst, dst, shifted_reg);
1815 }
1816
1817
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1818 LogicVRegister Simulator::cls(VectorFormat vform,
1819 LogicVRegister dst,
1820 const LogicVRegister& src) {
1821 uint64_t result[16];
1822 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1823 int laneCount = LaneCountFromFormat(vform);
1824 for (int i = 0; i < laneCount; i++) {
1825 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1826 }
1827
1828 dst.ClearForWrite(vform);
1829 for (int i = 0; i < laneCount; ++i) {
1830 dst.SetUint(vform, i, result[i]);
1831 }
1832 return dst;
1833 }
1834
1835
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1836 LogicVRegister Simulator::clz(VectorFormat vform,
1837 LogicVRegister dst,
1838 const LogicVRegister& src) {
1839 uint64_t result[16];
1840 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1841 int laneCount = LaneCountFromFormat(vform);
1842 for (int i = 0; i < laneCount; i++) {
1843 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1844 }
1845
1846 dst.ClearForWrite(vform);
1847 for (int i = 0; i < laneCount; ++i) {
1848 dst.SetUint(vform, i, result[i]);
1849 }
1850 return dst;
1851 }
1852
1853
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1854 LogicVRegister Simulator::cnt(VectorFormat vform,
1855 LogicVRegister dst,
1856 const LogicVRegister& src) {
1857 uint64_t result[16];
1858 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1859 int laneCount = LaneCountFromFormat(vform);
1860 for (int i = 0; i < laneCount; i++) {
1861 uint64_t value = src.Uint(vform, i);
1862 result[i] = 0;
1863 for (int j = 0; j < laneSizeInBits; j++) {
1864 result[i] += (value & 1);
1865 value >>= 1;
1866 }
1867 }
1868
1869 dst.ClearForWrite(vform);
1870 for (int i = 0; i < laneCount; ++i) {
1871 dst.SetUint(vform, i, result[i]);
1872 }
1873 return dst;
1874 }
1875
1876
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1877 LogicVRegister Simulator::sshl(VectorFormat vform,
1878 LogicVRegister dst,
1879 const LogicVRegister& src1,
1880 const LogicVRegister& src2) {
1881 dst.ClearForWrite(vform);
1882 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1883 int8_t shift_val = src2.Int(vform, i);
1884 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1885
1886 // Set signed saturation state.
1887 if ((shift_val > CountLeadingSignBits(lj_src_val)) &&
1888 (lj_src_val != 0)) {
1889 dst.SetSignedSat(i, lj_src_val >= 0);
1890 }
1891
1892 // Set unsigned saturation state.
1893 if (lj_src_val < 0) {
1894 dst.SetUnsignedSat(i, false);
1895 } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1896 (lj_src_val != 0)) {
1897 dst.SetUnsignedSat(i, true);
1898 }
1899
1900 int64_t src_val = src1.Int(vform, i);
1901 if (shift_val > 63) {
1902 dst.SetInt(vform, i, 0);
1903 } else if (shift_val < -63) {
1904 dst.SetRounding(i, src_val < 0);
1905 dst.SetInt(vform, i, (src_val < 0) ? -1 : 0);
1906 } else {
1907 if (shift_val < 0) {
1908 // Set rounding state. Rounding only needed on right shifts.
1909 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1910 dst.SetRounding(i, true);
1911 }
1912 src_val >>= -shift_val;
1913 } else {
1914 src_val <<= shift_val;
1915 }
1916 dst.SetInt(vform, i, src_val);
1917 }
1918 }
1919 return dst;
1920 }
1921
1922
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1923 LogicVRegister Simulator::ushl(VectorFormat vform,
1924 LogicVRegister dst,
1925 const LogicVRegister& src1,
1926 const LogicVRegister& src2) {
1927 dst.ClearForWrite(vform);
1928 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1929 int8_t shift_val = src2.Int(vform, i);
1930 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1931
1932 // Set saturation state.
1933 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1934 dst.SetUnsignedSat(i, true);
1935 }
1936
1937 uint64_t src_val = src1.Uint(vform, i);
1938 if ((shift_val > 63) || (shift_val < -64)) {
1939 dst.SetUint(vform, i, 0);
1940 } else {
1941 if (shift_val < 0) {
1942 // Set rounding state. Rounding only needed on right shifts.
1943 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1944 dst.SetRounding(i, true);
1945 }
1946
1947 if (shift_val == -64) {
1948 src_val = 0;
1949 } else {
1950 src_val >>= -shift_val;
1951 }
1952 } else {
1953 src_val <<= shift_val;
1954 }
1955 dst.SetUint(vform, i, src_val);
1956 }
1957 }
1958 return dst;
1959 }
1960
1961
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1962 LogicVRegister Simulator::neg(VectorFormat vform,
1963 LogicVRegister dst,
1964 const LogicVRegister& src) {
1965 dst.ClearForWrite(vform);
1966 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1967 // Test for signed saturation.
1968 int64_t sa = src.Int(vform, i);
1969 if (sa == MinIntFromFormat(vform)) {
1970 dst.SetSignedSat(i, true);
1971 }
1972 dst.SetInt(vform, i, -sa);
1973 }
1974 return dst;
1975 }
1976
1977
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1978 LogicVRegister Simulator::suqadd(VectorFormat vform,
1979 LogicVRegister dst,
1980 const LogicVRegister& src) {
1981 dst.ClearForWrite(vform);
1982 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1983 int64_t sa = dst.IntLeftJustified(vform, i);
1984 uint64_t ub = src.UintLeftJustified(vform, i);
1985 int64_t sr = sa + ub;
1986
1987 if (sr < sa) { // Test for signed positive saturation.
1988 dst.SetInt(vform, i, MaxIntFromFormat(vform));
1989 } else {
1990 dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i));
1991 }
1992 }
1993 return dst;
1994 }
1995
1996
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1997 LogicVRegister Simulator::usqadd(VectorFormat vform,
1998 LogicVRegister dst,
1999 const LogicVRegister& src) {
2000 dst.ClearForWrite(vform);
2001 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2002 uint64_t ua = dst.UintLeftJustified(vform, i);
2003 int64_t sb = src.IntLeftJustified(vform, i);
2004 uint64_t ur = ua + sb;
2005
2006 if ((sb > 0) && (ur <= ua)) {
2007 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2008 } else if ((sb < 0) && (ur >= ua)) {
2009 dst.SetUint(vform, i, 0); // Negative saturation.
2010 } else {
2011 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
2012 }
2013 }
2014 return dst;
2015 }
2016
2017
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2018 LogicVRegister Simulator::abs(VectorFormat vform,
2019 LogicVRegister dst,
2020 const LogicVRegister& src) {
2021 dst.ClearForWrite(vform);
2022 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2023 // Test for signed saturation.
2024 int64_t sa = src.Int(vform, i);
2025 if (sa == MinIntFromFormat(vform)) {
2026 dst.SetSignedSat(i, true);
2027 }
2028 if (sa < 0) {
2029 dst.SetInt(vform, i, -sa);
2030 } else {
2031 dst.SetInt(vform, i, sa);
2032 }
2033 }
2034 return dst;
2035 }
2036
2037
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dstIsSigned,const LogicVRegister & src,bool srcIsSigned)2038 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2039 LogicVRegister dst,
2040 bool dstIsSigned,
2041 const LogicVRegister& src,
2042 bool srcIsSigned) {
2043 bool upperhalf = false;
2044 VectorFormat srcform = kFormatUndefined;
2045 int64_t ssrc[8];
2046 uint64_t usrc[8];
2047
2048 switch (dstform) {
2049 case kFormat8B : upperhalf = false; srcform = kFormat8H; break;
2050 case kFormat16B: upperhalf = true; srcform = kFormat8H; break;
2051 case kFormat4H : upperhalf = false; srcform = kFormat4S; break;
2052 case kFormat8H : upperhalf = true; srcform = kFormat4S; break;
2053 case kFormat2S : upperhalf = false; srcform = kFormat2D; break;
2054 case kFormat4S : upperhalf = true; srcform = kFormat2D; break;
2055 case kFormatB : upperhalf = false; srcform = kFormatH; break;
2056 case kFormatH : upperhalf = false; srcform = kFormatS; break;
2057 case kFormatS : upperhalf = false; srcform = kFormatD; break;
2058 default:VIXL_UNIMPLEMENTED();
2059 }
2060
2061 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2062 ssrc[i] = src.Int(srcform, i);
2063 usrc[i] = src.Uint(srcform, i);
2064 }
2065
2066 int offset;
2067 if (upperhalf) {
2068 offset = LaneCountFromFormat(dstform) / 2;
2069 } else {
2070 offset = 0;
2071 dst.ClearForWrite(dstform);
2072 }
2073
2074 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2075 // Test for signed saturation
2076 if (ssrc[i] > MaxIntFromFormat(dstform)) {
2077 dst.SetSignedSat(offset + i, true);
2078 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
2079 dst.SetSignedSat(offset + i, false);
2080 }
2081
2082 // Test for unsigned saturation
2083 if (srcIsSigned) {
2084 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2085 dst.SetUnsignedSat(offset + i, true);
2086 } else if (ssrc[i] < 0) {
2087 dst.SetUnsignedSat(offset + i, false);
2088 }
2089 } else {
2090 if (usrc[i] > MaxUintFromFormat(dstform)) {
2091 dst.SetUnsignedSat(offset + i, true);
2092 }
2093 }
2094
2095 int64_t result;
2096 if (srcIsSigned) {
2097 result = ssrc[i] & MaxUintFromFormat(dstform);
2098 } else {
2099 result = usrc[i] & MaxUintFromFormat(dstform);
2100 }
2101
2102 if (dstIsSigned) {
2103 dst.SetInt(dstform, offset + i, result);
2104 } else {
2105 dst.SetUint(dstform, offset + i, result);
2106 }
2107 }
2108 return dst;
2109 }
2110
2111
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2112 LogicVRegister Simulator::xtn(VectorFormat vform,
2113 LogicVRegister dst,
2114 const LogicVRegister& src) {
2115 return extractnarrow(vform, dst, true, src, true);
2116 }
2117
2118
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2119 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2120 LogicVRegister dst,
2121 const LogicVRegister& src) {
2122 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2123 }
2124
2125
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2126 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2127 LogicVRegister dst,
2128 const LogicVRegister& src) {
2129 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2130 }
2131
2132
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2133 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2134 LogicVRegister dst,
2135 const LogicVRegister& src) {
2136 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2137 }
2138
2139
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool issigned)2140 LogicVRegister Simulator::absdiff(VectorFormat vform,
2141 LogicVRegister dst,
2142 const LogicVRegister& src1,
2143 const LogicVRegister& src2,
2144 bool issigned) {
2145 dst.ClearForWrite(vform);
2146 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2147 if (issigned) {
2148 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
2149 sr = sr > 0 ? sr : -sr;
2150 dst.SetInt(vform, i, sr);
2151 } else {
2152 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
2153 sr = sr > 0 ? sr : -sr;
2154 dst.SetUint(vform, i, sr);
2155 }
2156 }
2157 return dst;
2158 }
2159
2160
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2161 LogicVRegister Simulator::saba(VectorFormat vform,
2162 LogicVRegister dst,
2163 const LogicVRegister& src1,
2164 const LogicVRegister& src2) {
2165 SimVRegister temp;
2166 dst.ClearForWrite(vform);
2167 absdiff(vform, temp, src1, src2, true);
2168 add(vform, dst, dst, temp);
2169 return dst;
2170 }
2171
2172
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2173 LogicVRegister Simulator::uaba(VectorFormat vform,
2174 LogicVRegister dst,
2175 const LogicVRegister& src1,
2176 const LogicVRegister& src2) {
2177 SimVRegister temp;
2178 dst.ClearForWrite(vform);
2179 absdiff(vform, temp, src1, src2, false);
2180 add(vform, dst, dst, temp);
2181 return dst;
2182 }
2183
2184
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2185 LogicVRegister Simulator::not_(VectorFormat vform,
2186 LogicVRegister dst,
2187 const LogicVRegister& src) {
2188 dst.ClearForWrite(vform);
2189 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2190 dst.SetUint(vform, i, ~src.Uint(vform, i));
2191 }
2192 return dst;
2193 }
2194
2195
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2196 LogicVRegister Simulator::rbit(VectorFormat vform,
2197 LogicVRegister dst,
2198 const LogicVRegister& src) {
2199 uint64_t result[16];
2200 int laneCount = LaneCountFromFormat(vform);
2201 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
2202 uint64_t reversed_value;
2203 uint64_t value;
2204 for (int i = 0; i < laneCount; i++) {
2205 value = src.Uint(vform, i);
2206 reversed_value = 0;
2207 for (int j = 0; j < laneSizeInBits; j++) {
2208 reversed_value = (reversed_value << 1) | (value & 1);
2209 value >>= 1;
2210 }
2211 result[i] = reversed_value;
2212 }
2213
2214 dst.ClearForWrite(vform);
2215 for (int i = 0; i < laneCount; ++i) {
2216 dst.SetUint(vform, i, result[i]);
2217 }
2218 return dst;
2219 }
2220
2221
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int revSize)2222 LogicVRegister Simulator::rev(VectorFormat vform,
2223 LogicVRegister dst,
2224 const LogicVRegister& src,
2225 int revSize) {
2226 uint64_t result[16];
2227 int laneCount = LaneCountFromFormat(vform);
2228 int laneSize = LaneSizeInBytesFromFormat(vform);
2229 int lanesPerLoop = revSize / laneSize;
2230 for (int i = 0; i < laneCount; i += lanesPerLoop) {
2231 for (int j = 0; j < lanesPerLoop; j++) {
2232 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
2233 }
2234 }
2235 dst.ClearForWrite(vform);
2236 for (int i = 0; i < laneCount; ++i) {
2237 dst.SetUint(vform, i, result[i]);
2238 }
2239 return dst;
2240 }
2241
2242
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2243 LogicVRegister Simulator::rev16(VectorFormat vform,
2244 LogicVRegister dst,
2245 const LogicVRegister& src) {
2246 return rev(vform, dst, src, 2);
2247 }
2248
2249
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2250 LogicVRegister Simulator::rev32(VectorFormat vform,
2251 LogicVRegister dst,
2252 const LogicVRegister& src) {
2253 return rev(vform, dst, src, 4);
2254 }
2255
2256
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2257 LogicVRegister Simulator::rev64(VectorFormat vform,
2258 LogicVRegister dst,
2259 const LogicVRegister& src) {
2260 return rev(vform, dst, src, 8);
2261 }
2262
2263
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2264 LogicVRegister Simulator::addlp(VectorFormat vform,
2265 LogicVRegister dst,
2266 const LogicVRegister& src,
2267 bool is_signed,
2268 bool do_accumulate) {
2269 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2270
2271 int64_t sr[16];
2272 uint64_t ur[16];
2273
2274 int laneCount = LaneCountFromFormat(vform);
2275 for (int i = 0; i < laneCount; ++i) {
2276 if (is_signed) {
2277 sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1);
2278 } else {
2279 ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2280 }
2281 }
2282
2283 dst.ClearForWrite(vform);
2284 for (int i = 0; i < laneCount; ++i) {
2285 if (do_accumulate) {
2286 if (is_signed) {
2287 dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]);
2288 } else {
2289 dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]);
2290 }
2291 } else {
2292 if (is_signed) {
2293 dst.SetInt(vform, i, sr[i]);
2294 } else {
2295 dst.SetUint(vform, i, ur[i]);
2296 }
2297 }
2298 }
2299
2300 return dst;
2301 }
2302
2303
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2304 LogicVRegister Simulator::saddlp(VectorFormat vform,
2305 LogicVRegister dst,
2306 const LogicVRegister& src) {
2307 return addlp(vform, dst, src, true, false);
2308 }
2309
2310
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2311 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2312 LogicVRegister dst,
2313 const LogicVRegister& src) {
2314 return addlp(vform, dst, src, false, false);
2315 }
2316
2317
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2318 LogicVRegister Simulator::sadalp(VectorFormat vform,
2319 LogicVRegister dst,
2320 const LogicVRegister& src) {
2321 return addlp(vform, dst, src, true, true);
2322 }
2323
2324
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2325 LogicVRegister Simulator::uadalp(VectorFormat vform,
2326 LogicVRegister dst,
2327 const LogicVRegister& src) {
2328 return addlp(vform, dst, src, false, true);
2329 }
2330
2331
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2332 LogicVRegister Simulator::ext(VectorFormat vform,
2333 LogicVRegister dst,
2334 const LogicVRegister& src1,
2335 const LogicVRegister& src2,
2336 int index) {
2337 uint8_t result[16];
2338 int laneCount = LaneCountFromFormat(vform);
2339 for (int i = 0; i < laneCount - index; ++i) {
2340 result[i] = src1.Uint(vform, i + index);
2341 }
2342 for (int i = 0; i < index; ++i) {
2343 result[laneCount - index + i] = src2.Uint(vform, i);
2344 }
2345 dst.ClearForWrite(vform);
2346 for (int i = 0; i < laneCount; ++i) {
2347 dst.SetUint(vform, i, result[i]);
2348 }
2349 return dst;
2350 }
2351
2352
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2353 LogicVRegister Simulator::dup_element(VectorFormat vform,
2354 LogicVRegister dst,
2355 const LogicVRegister& src,
2356 int src_index) {
2357 int laneCount = LaneCountFromFormat(vform);
2358 uint64_t value = src.Uint(vform, src_index);
2359 dst.ClearForWrite(vform);
2360 for (int i = 0; i < laneCount; ++i) {
2361 dst.SetUint(vform, i, value);
2362 }
2363 return dst;
2364 }
2365
2366
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2367 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2368 LogicVRegister dst,
2369 uint64_t imm) {
2370 int laneCount = LaneCountFromFormat(vform);
2371 uint64_t value = imm & MaxUintFromFormat(vform);
2372 dst.ClearForWrite(vform);
2373 for (int i = 0; i < laneCount; ++i) {
2374 dst.SetUint(vform, i, value);
2375 }
2376 return dst;
2377 }
2378
2379
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2380 LogicVRegister Simulator::ins_element(VectorFormat vform,
2381 LogicVRegister dst,
2382 int dst_index,
2383 const LogicVRegister& src,
2384 int src_index) {
2385 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2386 return dst;
2387 }
2388
2389
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2390 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2391 LogicVRegister dst,
2392 int dst_index,
2393 uint64_t imm) {
2394 uint64_t value = imm & MaxUintFromFormat(vform);
2395 dst.SetUint(vform, dst_index, value);
2396 return dst;
2397 }
2398
2399
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)2400 LogicVRegister Simulator::movi(VectorFormat vform,
2401 LogicVRegister dst,
2402 uint64_t imm) {
2403 int laneCount = LaneCountFromFormat(vform);
2404 dst.ClearForWrite(vform);
2405 for (int i = 0; i < laneCount; ++i) {
2406 dst.SetUint(vform, i, imm);
2407 }
2408 return dst;
2409 }
2410
2411
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)2412 LogicVRegister Simulator::mvni(VectorFormat vform,
2413 LogicVRegister dst,
2414 uint64_t imm) {
2415 int laneCount = LaneCountFromFormat(vform);
2416 dst.ClearForWrite(vform);
2417 for (int i = 0; i < laneCount; ++i) {
2418 dst.SetUint(vform, i, ~imm);
2419 }
2420 return dst;
2421 }
2422
2423
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)2424 LogicVRegister Simulator::orr(VectorFormat vform,
2425 LogicVRegister dst,
2426 const LogicVRegister& src,
2427 uint64_t imm) {
2428 uint64_t result[16];
2429 int laneCount = LaneCountFromFormat(vform);
2430 for (int i = 0; i < laneCount; ++i) {
2431 result[i] = src.Uint(vform, i) | imm;
2432 }
2433 dst.ClearForWrite(vform);
2434 for (int i = 0; i < laneCount; ++i) {
2435 dst.SetUint(vform, i, result[i]);
2436 }
2437 return dst;
2438 }
2439
2440
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2441 LogicVRegister Simulator::uxtl(VectorFormat vform,
2442 LogicVRegister dst,
2443 const LogicVRegister& src) {
2444 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2445
2446 dst.ClearForWrite(vform);
2447 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2448 dst.SetUint(vform, i, src.Uint(vform_half, i));
2449 }
2450 return dst;
2451 }
2452
2453
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2454 LogicVRegister Simulator::sxtl(VectorFormat vform,
2455 LogicVRegister dst,
2456 const LogicVRegister& src) {
2457 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2458
2459 dst.ClearForWrite(vform);
2460 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2461 dst.SetInt(vform, i, src.Int(vform_half, i));
2462 }
2463 return dst;
2464 }
2465
2466
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2467 LogicVRegister Simulator::uxtl2(VectorFormat vform,
2468 LogicVRegister dst,
2469 const LogicVRegister& src) {
2470 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2471 int lane_count = LaneCountFromFormat(vform);
2472
2473 dst.ClearForWrite(vform);
2474 for (int i = 0; i < lane_count; i++) {
2475 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2476 }
2477 return dst;
2478 }
2479
2480
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2481 LogicVRegister Simulator::sxtl2(VectorFormat vform,
2482 LogicVRegister dst,
2483 const LogicVRegister& src) {
2484 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2485 int lane_count = LaneCountFromFormat(vform);
2486
2487 dst.ClearForWrite(vform);
2488 for (int i = 0; i < lane_count; i++) {
2489 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2490 }
2491 return dst;
2492 }
2493
2494
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2495 LogicVRegister Simulator::shrn(VectorFormat vform,
2496 LogicVRegister dst,
2497 const LogicVRegister& src,
2498 int shift) {
2499 SimVRegister temp;
2500 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2501 VectorFormat vform_dst = vform;
2502 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2503 return extractnarrow(vform_dst, dst, false, shifted_src, false);
2504 }
2505
2506
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2507 LogicVRegister Simulator::shrn2(VectorFormat vform,
2508 LogicVRegister dst,
2509 const LogicVRegister& src,
2510 int shift) {
2511 SimVRegister temp;
2512 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2513 VectorFormat vformdst = vform;
2514 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2515 return extractnarrow(vformdst, dst, false, shifted_src, false);
2516 }
2517
2518
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2519 LogicVRegister Simulator::rshrn(VectorFormat vform,
2520 LogicVRegister dst,
2521 const LogicVRegister& src,
2522 int shift) {
2523 SimVRegister temp;
2524 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2525 VectorFormat vformdst = vform;
2526 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2527 return extractnarrow(vformdst, dst, false, shifted_src, false);
2528 }
2529
2530
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2531 LogicVRegister Simulator::rshrn2(VectorFormat vform,
2532 LogicVRegister dst,
2533 const LogicVRegister& src,
2534 int shift) {
2535 SimVRegister temp;
2536 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2537 VectorFormat vformdst = vform;
2538 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2539 return extractnarrow(vformdst, dst, false, shifted_src, false);
2540 }
2541
2542
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2543 LogicVRegister Simulator::tbl(VectorFormat vform,
2544 LogicVRegister dst,
2545 const LogicVRegister& tab,
2546 const LogicVRegister& ind) {
2547 movi(vform, dst, 0);
2548 return tbx(vform, dst, tab, ind);
2549 }
2550
2551
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2552 LogicVRegister Simulator::tbl(VectorFormat vform,
2553 LogicVRegister dst,
2554 const LogicVRegister& tab,
2555 const LogicVRegister& tab2,
2556 const LogicVRegister& ind) {
2557 movi(vform, dst, 0);
2558 return tbx(vform, dst, tab, tab2, ind);
2559 }
2560
2561
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2562 LogicVRegister Simulator::tbl(VectorFormat vform,
2563 LogicVRegister dst,
2564 const LogicVRegister& tab,
2565 const LogicVRegister& tab2,
2566 const LogicVRegister& tab3,
2567 const LogicVRegister& ind) {
2568 movi(vform, dst, 0);
2569 return tbx(vform, dst, tab, tab2, tab3, ind);
2570 }
2571
2572
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2573 LogicVRegister Simulator::tbl(VectorFormat vform,
2574 LogicVRegister dst,
2575 const LogicVRegister& tab,
2576 const LogicVRegister& tab2,
2577 const LogicVRegister& tab3,
2578 const LogicVRegister& tab4,
2579 const LogicVRegister& ind) {
2580 movi(vform, dst, 0);
2581 return tbx(vform, dst, tab, tab2, tab3, tab4, ind);
2582 }
2583
2584
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2585 LogicVRegister Simulator::tbx(VectorFormat vform,
2586 LogicVRegister dst,
2587 const LogicVRegister& tab,
2588 const LogicVRegister& ind) {
2589 dst.ClearForWrite(vform);
2590 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2591 uint64_t j = ind.Uint(vform, i);
2592 switch (j >> 4) {
2593 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2594 }
2595 }
2596 return dst;
2597 }
2598
2599
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2600 LogicVRegister Simulator::tbx(VectorFormat vform,
2601 LogicVRegister dst,
2602 const LogicVRegister& tab,
2603 const LogicVRegister& tab2,
2604 const LogicVRegister& ind) {
2605 dst.ClearForWrite(vform);
2606 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2607 uint64_t j = ind.Uint(vform, i);
2608 switch (j >> 4) {
2609 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2610 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
2611 }
2612 }
2613 return dst;
2614 }
2615
2616
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2617 LogicVRegister Simulator::tbx(VectorFormat vform,
2618 LogicVRegister dst,
2619 const LogicVRegister& tab,
2620 const LogicVRegister& tab2,
2621 const LogicVRegister& tab3,
2622 const LogicVRegister& ind) {
2623 dst.ClearForWrite(vform);
2624 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2625 uint64_t j = ind.Uint(vform, i);
2626 switch (j >> 4) {
2627 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2628 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
2629 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
2630 }
2631 }
2632 return dst;
2633 }
2634
2635
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2636 LogicVRegister Simulator::tbx(VectorFormat vform,
2637 LogicVRegister dst,
2638 const LogicVRegister& tab,
2639 const LogicVRegister& tab2,
2640 const LogicVRegister& tab3,
2641 const LogicVRegister& tab4,
2642 const LogicVRegister& ind) {
2643 dst.ClearForWrite(vform);
2644 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2645 uint64_t j = ind.Uint(vform, i);
2646 switch (j >> 4) {
2647 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2648 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
2649 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
2650 case 3: dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); break;
2651 }
2652 }
2653 return dst;
2654 }
2655
2656
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2657 LogicVRegister Simulator::uqshrn(VectorFormat vform,
2658 LogicVRegister dst,
2659 const LogicVRegister& src,
2660 int shift) {
2661 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2662 }
2663
2664
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2665 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
2666 LogicVRegister dst,
2667 const LogicVRegister& src,
2668 int shift) {
2669 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2670 }
2671
2672
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2673 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
2674 LogicVRegister dst,
2675 const LogicVRegister& src,
2676 int shift) {
2677 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2678 }
2679
2680
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2681 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
2682 LogicVRegister dst,
2683 const LogicVRegister& src,
2684 int shift) {
2685 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2686 }
2687
2688
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2689 LogicVRegister Simulator::sqshrn(VectorFormat vform,
2690 LogicVRegister dst,
2691 const LogicVRegister& src,
2692 int shift) {
2693 SimVRegister temp;
2694 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2695 VectorFormat vformdst = vform;
2696 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2697 return sqxtn(vformdst, dst, shifted_src);
2698 }
2699
2700
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2701 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
2702 LogicVRegister dst,
2703 const LogicVRegister& src,
2704 int shift) {
2705 SimVRegister temp;
2706 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2707 VectorFormat vformdst = vform;
2708 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2709 return sqxtn(vformdst, dst, shifted_src);
2710 }
2711
2712
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2713 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
2714 LogicVRegister dst,
2715 const LogicVRegister& src,
2716 int shift) {
2717 SimVRegister temp;
2718 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2719 VectorFormat vformdst = vform;
2720 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2721 return sqxtn(vformdst, dst, shifted_src);
2722 }
2723
2724
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2725 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
2726 LogicVRegister dst,
2727 const LogicVRegister& src,
2728 int shift) {
2729 SimVRegister temp;
2730 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2731 VectorFormat vformdst = vform;
2732 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2733 return sqxtn(vformdst, dst, shifted_src);
2734 }
2735
2736
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2737 LogicVRegister Simulator::sqshrun(VectorFormat vform,
2738 LogicVRegister dst,
2739 const LogicVRegister& src,
2740 int shift) {
2741 SimVRegister temp;
2742 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2743 VectorFormat vformdst = vform;
2744 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2745 return sqxtun(vformdst, dst, shifted_src);
2746 }
2747
2748
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2749 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
2750 LogicVRegister dst,
2751 const LogicVRegister& src,
2752 int shift) {
2753 SimVRegister temp;
2754 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2755 VectorFormat vformdst = vform;
2756 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2757 return sqxtun(vformdst, dst, shifted_src);
2758 }
2759
2760
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2761 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
2762 LogicVRegister dst,
2763 const LogicVRegister& src,
2764 int shift) {
2765 SimVRegister temp;
2766 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2767 VectorFormat vformdst = vform;
2768 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2769 return sqxtun(vformdst, dst, shifted_src);
2770 }
2771
2772
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2773 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
2774 LogicVRegister dst,
2775 const LogicVRegister& src,
2776 int shift) {
2777 SimVRegister temp;
2778 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2779 VectorFormat vformdst = vform;
2780 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2781 return sqxtun(vformdst, dst, shifted_src);
2782 }
2783
2784
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2785 LogicVRegister Simulator::uaddl(VectorFormat vform,
2786 LogicVRegister dst,
2787 const LogicVRegister& src1,
2788 const LogicVRegister& src2) {
2789 SimVRegister temp1, temp2;
2790 uxtl(vform, temp1, src1);
2791 uxtl(vform, temp2, src2);
2792 add(vform, dst, temp1, temp2);
2793 return dst;
2794 }
2795
2796
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2797 LogicVRegister Simulator::uaddl2(VectorFormat vform,
2798 LogicVRegister dst,
2799 const LogicVRegister& src1,
2800 const LogicVRegister& src2) {
2801 SimVRegister temp1, temp2;
2802 uxtl2(vform, temp1, src1);
2803 uxtl2(vform, temp2, src2);
2804 add(vform, dst, temp1, temp2);
2805 return dst;
2806 }
2807
2808
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2809 LogicVRegister Simulator::uaddw(VectorFormat vform,
2810 LogicVRegister dst,
2811 const LogicVRegister& src1,
2812 const LogicVRegister& src2) {
2813 SimVRegister temp;
2814 uxtl(vform, temp, src2);
2815 add(vform, dst, src1, temp);
2816 return dst;
2817 }
2818
2819
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2820 LogicVRegister Simulator::uaddw2(VectorFormat vform,
2821 LogicVRegister dst,
2822 const LogicVRegister& src1,
2823 const LogicVRegister& src2) {
2824 SimVRegister temp;
2825 uxtl2(vform, temp, src2);
2826 add(vform, dst, src1, temp);
2827 return dst;
2828 }
2829
2830
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2831 LogicVRegister Simulator::saddl(VectorFormat vform,
2832 LogicVRegister dst,
2833 const LogicVRegister& src1,
2834 const LogicVRegister& src2) {
2835 SimVRegister temp1, temp2;
2836 sxtl(vform, temp1, src1);
2837 sxtl(vform, temp2, src2);
2838 add(vform, dst, temp1, temp2);
2839 return dst;
2840 }
2841
2842
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2843 LogicVRegister Simulator::saddl2(VectorFormat vform,
2844 LogicVRegister dst,
2845 const LogicVRegister& src1,
2846 const LogicVRegister& src2) {
2847 SimVRegister temp1, temp2;
2848 sxtl2(vform, temp1, src1);
2849 sxtl2(vform, temp2, src2);
2850 add(vform, dst, temp1, temp2);
2851 return dst;
2852 }
2853
2854
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2855 LogicVRegister Simulator::saddw(VectorFormat vform,
2856 LogicVRegister dst,
2857 const LogicVRegister& src1,
2858 const LogicVRegister& src2) {
2859 SimVRegister temp;
2860 sxtl(vform, temp, src2);
2861 add(vform, dst, src1, temp);
2862 return dst;
2863 }
2864
2865
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2866 LogicVRegister Simulator::saddw2(VectorFormat vform,
2867 LogicVRegister dst,
2868 const LogicVRegister& src1,
2869 const LogicVRegister& src2) {
2870 SimVRegister temp;
2871 sxtl2(vform, temp, src2);
2872 add(vform, dst, src1, temp);
2873 return dst;
2874 }
2875
2876
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2877 LogicVRegister Simulator::usubl(VectorFormat vform,
2878 LogicVRegister dst,
2879 const LogicVRegister& src1,
2880 const LogicVRegister& src2) {
2881 SimVRegister temp1, temp2;
2882 uxtl(vform, temp1, src1);
2883 uxtl(vform, temp2, src2);
2884 sub(vform, dst, temp1, temp2);
2885 return dst;
2886 }
2887
2888
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2889 LogicVRegister Simulator::usubl2(VectorFormat vform,
2890 LogicVRegister dst,
2891 const LogicVRegister& src1,
2892 const LogicVRegister& src2) {
2893 SimVRegister temp1, temp2;
2894 uxtl2(vform, temp1, src1);
2895 uxtl2(vform, temp2, src2);
2896 sub(vform, dst, temp1, temp2);
2897 return dst;
2898 }
2899
2900
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2901 LogicVRegister Simulator::usubw(VectorFormat vform,
2902 LogicVRegister dst,
2903 const LogicVRegister& src1,
2904 const LogicVRegister& src2) {
2905 SimVRegister temp;
2906 uxtl(vform, temp, src2);
2907 sub(vform, dst, src1, temp);
2908 return dst;
2909 }
2910
2911
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2912 LogicVRegister Simulator::usubw2(VectorFormat vform,
2913 LogicVRegister dst,
2914 const LogicVRegister& src1,
2915 const LogicVRegister& src2) {
2916 SimVRegister temp;
2917 uxtl2(vform, temp, src2);
2918 sub(vform, dst, src1, temp);
2919 return dst;
2920 }
2921
2922
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2923 LogicVRegister Simulator::ssubl(VectorFormat vform,
2924 LogicVRegister dst,
2925 const LogicVRegister& src1,
2926 const LogicVRegister& src2) {
2927 SimVRegister temp1, temp2;
2928 sxtl(vform, temp1, src1);
2929 sxtl(vform, temp2, src2);
2930 sub(vform, dst, temp1, temp2);
2931 return dst;
2932 }
2933
2934
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2935 LogicVRegister Simulator::ssubl2(VectorFormat vform,
2936 LogicVRegister dst,
2937 const LogicVRegister& src1,
2938 const LogicVRegister& src2) {
2939 SimVRegister temp1, temp2;
2940 sxtl2(vform, temp1, src1);
2941 sxtl2(vform, temp2, src2);
2942 sub(vform, dst, temp1, temp2);
2943 return dst;
2944 }
2945
2946
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2947 LogicVRegister Simulator::ssubw(VectorFormat vform,
2948 LogicVRegister dst,
2949 const LogicVRegister& src1,
2950 const LogicVRegister& src2) {
2951 SimVRegister temp;
2952 sxtl(vform, temp, src2);
2953 sub(vform, dst, src1, temp);
2954 return dst;
2955 }
2956
2957
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2958 LogicVRegister Simulator::ssubw2(VectorFormat vform,
2959 LogicVRegister dst,
2960 const LogicVRegister& src1,
2961 const LogicVRegister& src2) {
2962 SimVRegister temp;
2963 sxtl2(vform, temp, src2);
2964 sub(vform, dst, src1, temp);
2965 return dst;
2966 }
2967
2968
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2969 LogicVRegister Simulator::uabal(VectorFormat vform,
2970 LogicVRegister dst,
2971 const LogicVRegister& src1,
2972 const LogicVRegister& src2) {
2973 SimVRegister temp1, temp2;
2974 uxtl(vform, temp1, src1);
2975 uxtl(vform, temp2, src2);
2976 uaba(vform, dst, temp1, temp2);
2977 return dst;
2978 }
2979
2980
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2981 LogicVRegister Simulator::uabal2(VectorFormat vform,
2982 LogicVRegister dst,
2983 const LogicVRegister& src1,
2984 const LogicVRegister& src2) {
2985 SimVRegister temp1, temp2;
2986 uxtl2(vform, temp1, src1);
2987 uxtl2(vform, temp2, src2);
2988 uaba(vform, dst, temp1, temp2);
2989 return dst;
2990 }
2991
2992
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2993 LogicVRegister Simulator::sabal(VectorFormat vform,
2994 LogicVRegister dst,
2995 const LogicVRegister& src1,
2996 const LogicVRegister& src2) {
2997 SimVRegister temp1, temp2;
2998 sxtl(vform, temp1, src1);
2999 sxtl(vform, temp2, src2);
3000 saba(vform, dst, temp1, temp2);
3001 return dst;
3002 }
3003
3004
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3005 LogicVRegister Simulator::sabal2(VectorFormat vform,
3006 LogicVRegister dst,
3007 const LogicVRegister& src1,
3008 const LogicVRegister& src2) {
3009 SimVRegister temp1, temp2;
3010 sxtl2(vform, temp1, src1);
3011 sxtl2(vform, temp2, src2);
3012 saba(vform, dst, temp1, temp2);
3013 return dst;
3014 }
3015
3016
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3017 LogicVRegister Simulator::uabdl(VectorFormat vform,
3018 LogicVRegister dst,
3019 const LogicVRegister& src1,
3020 const LogicVRegister& src2) {
3021 SimVRegister temp1, temp2;
3022 uxtl(vform, temp1, src1);
3023 uxtl(vform, temp2, src2);
3024 absdiff(vform, dst, temp1, temp2, false);
3025 return dst;
3026 }
3027
3028
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3029 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3030 LogicVRegister dst,
3031 const LogicVRegister& src1,
3032 const LogicVRegister& src2) {
3033 SimVRegister temp1, temp2;
3034 uxtl2(vform, temp1, src1);
3035 uxtl2(vform, temp2, src2);
3036 absdiff(vform, dst, temp1, temp2, false);
3037 return dst;
3038 }
3039
3040
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3041 LogicVRegister Simulator::sabdl(VectorFormat vform,
3042 LogicVRegister dst,
3043 const LogicVRegister& src1,
3044 const LogicVRegister& src2) {
3045 SimVRegister temp1, temp2;
3046 sxtl(vform, temp1, src1);
3047 sxtl(vform, temp2, src2);
3048 absdiff(vform, dst, temp1, temp2, true);
3049 return dst;
3050 }
3051
3052
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3053 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3054 LogicVRegister dst,
3055 const LogicVRegister& src1,
3056 const LogicVRegister& src2) {
3057 SimVRegister temp1, temp2;
3058 sxtl2(vform, temp1, src1);
3059 sxtl2(vform, temp2, src2);
3060 absdiff(vform, dst, temp1, temp2, true);
3061 return dst;
3062 }
3063
3064
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3065 LogicVRegister Simulator::umull(VectorFormat vform,
3066 LogicVRegister dst,
3067 const LogicVRegister& src1,
3068 const LogicVRegister& src2) {
3069 SimVRegister temp1, temp2;
3070 uxtl(vform, temp1, src1);
3071 uxtl(vform, temp2, src2);
3072 mul(vform, dst, temp1, temp2);
3073 return dst;
3074 }
3075
3076
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3077 LogicVRegister Simulator::umull2(VectorFormat vform,
3078 LogicVRegister dst,
3079 const LogicVRegister& src1,
3080 const LogicVRegister& src2) {
3081 SimVRegister temp1, temp2;
3082 uxtl2(vform, temp1, src1);
3083 uxtl2(vform, temp2, src2);
3084 mul(vform, dst, temp1, temp2);
3085 return dst;
3086 }
3087
3088
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3089 LogicVRegister Simulator::smull(VectorFormat vform,
3090 LogicVRegister dst,
3091 const LogicVRegister& src1,
3092 const LogicVRegister& src2) {
3093 SimVRegister temp1, temp2;
3094 sxtl(vform, temp1, src1);
3095 sxtl(vform, temp2, src2);
3096 mul(vform, dst, temp1, temp2);
3097 return dst;
3098 }
3099
3100
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3101 LogicVRegister Simulator::smull2(VectorFormat vform,
3102 LogicVRegister dst,
3103 const LogicVRegister& src1,
3104 const LogicVRegister& src2) {
3105 SimVRegister temp1, temp2;
3106 sxtl2(vform, temp1, src1);
3107 sxtl2(vform, temp2, src2);
3108 mul(vform, dst, temp1, temp2);
3109 return dst;
3110 }
3111
3112
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3113 LogicVRegister Simulator::umlsl(VectorFormat vform,
3114 LogicVRegister dst,
3115 const LogicVRegister& src1,
3116 const LogicVRegister& src2) {
3117 SimVRegister temp1, temp2;
3118 uxtl(vform, temp1, src1);
3119 uxtl(vform, temp2, src2);
3120 mls(vform, dst, temp1, temp2);
3121 return dst;
3122 }
3123
3124
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3125 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3126 LogicVRegister dst,
3127 const LogicVRegister& src1,
3128 const LogicVRegister& src2) {
3129 SimVRegister temp1, temp2;
3130 uxtl2(vform, temp1, src1);
3131 uxtl2(vform, temp2, src2);
3132 mls(vform, dst, temp1, temp2);
3133 return dst;
3134 }
3135
3136
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3137 LogicVRegister Simulator::smlsl(VectorFormat vform,
3138 LogicVRegister dst,
3139 const LogicVRegister& src1,
3140 const LogicVRegister& src2) {
3141 SimVRegister temp1, temp2;
3142 sxtl(vform, temp1, src1);
3143 sxtl(vform, temp2, src2);
3144 mls(vform, dst, temp1, temp2);
3145 return dst;
3146 }
3147
3148
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3149 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3150 LogicVRegister dst,
3151 const LogicVRegister& src1,
3152 const LogicVRegister& src2) {
3153 SimVRegister temp1, temp2;
3154 sxtl2(vform, temp1, src1);
3155 sxtl2(vform, temp2, src2);
3156 mls(vform, dst, temp1, temp2);
3157 return dst;
3158 }
3159
3160
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3161 LogicVRegister Simulator::umlal(VectorFormat vform,
3162 LogicVRegister dst,
3163 const LogicVRegister& src1,
3164 const LogicVRegister& src2) {
3165 SimVRegister temp1, temp2;
3166 uxtl(vform, temp1, src1);
3167 uxtl(vform, temp2, src2);
3168 mla(vform, dst, temp1, temp2);
3169 return dst;
3170 }
3171
3172
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3173 LogicVRegister Simulator::umlal2(VectorFormat vform,
3174 LogicVRegister dst,
3175 const LogicVRegister& src1,
3176 const LogicVRegister& src2) {
3177 SimVRegister temp1, temp2;
3178 uxtl2(vform, temp1, src1);
3179 uxtl2(vform, temp2, src2);
3180 mla(vform, dst, temp1, temp2);
3181 return dst;
3182 }
3183
3184
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3185 LogicVRegister Simulator::smlal(VectorFormat vform,
3186 LogicVRegister dst,
3187 const LogicVRegister& src1,
3188 const LogicVRegister& src2) {
3189 SimVRegister temp1, temp2;
3190 sxtl(vform, temp1, src1);
3191 sxtl(vform, temp2, src2);
3192 mla(vform, dst, temp1, temp2);
3193 return dst;
3194 }
3195
3196
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3197 LogicVRegister Simulator::smlal2(VectorFormat vform,
3198 LogicVRegister dst,
3199 const LogicVRegister& src1,
3200 const LogicVRegister& src2) {
3201 SimVRegister temp1, temp2;
3202 sxtl2(vform, temp1, src1);
3203 sxtl2(vform, temp2, src2);
3204 mla(vform, dst, temp1, temp2);
3205 return dst;
3206 }
3207
3208
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3209 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3210 LogicVRegister dst,
3211 const LogicVRegister& src1,
3212 const LogicVRegister& src2) {
3213 SimVRegister temp;
3214 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3215 return add(vform, dst, dst, product).SignedSaturate(vform);
3216 }
3217
3218
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3219 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3220 LogicVRegister dst,
3221 const LogicVRegister& src1,
3222 const LogicVRegister& src2) {
3223 SimVRegister temp;
3224 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3225 return add(vform, dst, dst, product).SignedSaturate(vform);
3226 }
3227
3228
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3229 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3230 LogicVRegister dst,
3231 const LogicVRegister& src1,
3232 const LogicVRegister& src2) {
3233 SimVRegister temp;
3234 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3235 return sub(vform, dst, dst, product).SignedSaturate(vform);
3236 }
3237
3238
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3239 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3240 LogicVRegister dst,
3241 const LogicVRegister& src1,
3242 const LogicVRegister& src2) {
3243 SimVRegister temp;
3244 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3245 return sub(vform, dst, dst, product).SignedSaturate(vform);
3246 }
3247
3248
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3249 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3250 LogicVRegister dst,
3251 const LogicVRegister& src1,
3252 const LogicVRegister& src2) {
3253 SimVRegister temp;
3254 LogicVRegister product = smull(vform, temp, src1, src2);
3255 return add(vform, dst, product, product).SignedSaturate(vform);
3256 }
3257
3258
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3259 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3260 LogicVRegister dst,
3261 const LogicVRegister& src1,
3262 const LogicVRegister& src2) {
3263 SimVRegister temp;
3264 LogicVRegister product = smull2(vform, temp, src1, src2);
3265 return add(vform, dst, product, product).SignedSaturate(vform);
3266 }
3267
3268
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3269 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3270 LogicVRegister dst,
3271 const LogicVRegister& src1,
3272 const LogicVRegister& src2,
3273 bool round) {
3274 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3275 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3276 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3277
3278 int esize = LaneSizeInBitsFromFormat(vform);
3279 int round_const = round ? (1 << (esize - 2)) : 0;
3280 int64_t product;
3281
3282 dst.ClearForWrite(vform);
3283 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3284 product = src1.Int(vform, i) * src2.Int(vform, i);
3285 product += round_const;
3286 product = product >> (esize - 1);
3287
3288 if (product > MaxIntFromFormat(vform)) {
3289 product = MaxIntFromFormat(vform);
3290 } else if (product < MinIntFromFormat(vform)) {
3291 product = MinIntFromFormat(vform);
3292 }
3293 dst.SetInt(vform, i, product);
3294 }
3295 return dst;
3296 }
3297
3298
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3299 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
3300 LogicVRegister dst,
3301 const LogicVRegister& src1,
3302 const LogicVRegister& src2) {
3303 return sqrdmulh(vform, dst, src1, src2, false);
3304 }
3305
3306
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3307 LogicVRegister Simulator::addhn(VectorFormat vform,
3308 LogicVRegister dst,
3309 const LogicVRegister& src1,
3310 const LogicVRegister& src2) {
3311 SimVRegister temp;
3312 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3313 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3314 return dst;
3315 }
3316
3317
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3318 LogicVRegister Simulator::addhn2(VectorFormat vform,
3319 LogicVRegister dst,
3320 const LogicVRegister& src1,
3321 const LogicVRegister& src2) {
3322 SimVRegister temp;
3323 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3324 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3325 return dst;
3326 }
3327
3328
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3329 LogicVRegister Simulator::raddhn(VectorFormat vform,
3330 LogicVRegister dst,
3331 const LogicVRegister& src1,
3332 const LogicVRegister& src2) {
3333 SimVRegister temp;
3334 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3335 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3336 return dst;
3337 }
3338
3339
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3340 LogicVRegister Simulator::raddhn2(VectorFormat vform,
3341 LogicVRegister dst,
3342 const LogicVRegister& src1,
3343 const LogicVRegister& src2) {
3344 SimVRegister temp;
3345 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3346 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3347 return dst;
3348 }
3349
3350
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3351 LogicVRegister Simulator::subhn(VectorFormat vform,
3352 LogicVRegister dst,
3353 const LogicVRegister& src1,
3354 const LogicVRegister& src2) {
3355 SimVRegister temp;
3356 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3357 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3358 return dst;
3359 }
3360
3361
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3362 LogicVRegister Simulator::subhn2(VectorFormat vform,
3363 LogicVRegister dst,
3364 const LogicVRegister& src1,
3365 const LogicVRegister& src2) {
3366 SimVRegister temp;
3367 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3368 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3369 return dst;
3370 }
3371
3372
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3373 LogicVRegister Simulator::rsubhn(VectorFormat vform,
3374 LogicVRegister dst,
3375 const LogicVRegister& src1,
3376 const LogicVRegister& src2) {
3377 SimVRegister temp;
3378 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3379 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3380 return dst;
3381 }
3382
3383
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3384 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
3385 LogicVRegister dst,
3386 const LogicVRegister& src1,
3387 const LogicVRegister& src2) {
3388 SimVRegister temp;
3389 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3390 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3391 return dst;
3392 }
3393
3394
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3395 LogicVRegister Simulator::trn1(VectorFormat vform,
3396 LogicVRegister dst,
3397 const LogicVRegister& src1,
3398 const LogicVRegister& src2) {
3399 uint64_t result[16];
3400 int laneCount = LaneCountFromFormat(vform);
3401 int pairs = laneCount / 2;
3402 for (int i = 0; i < pairs; ++i) {
3403 result[2 * i] = src1.Uint(vform, 2 * i);
3404 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
3405 }
3406
3407 dst.ClearForWrite(vform);
3408 for (int i = 0; i < laneCount; ++i) {
3409 dst.SetUint(vform, i, result[i]);
3410 }
3411 return dst;
3412 }
3413
3414
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3415 LogicVRegister Simulator::trn2(VectorFormat vform,
3416 LogicVRegister dst,
3417 const LogicVRegister& src1,
3418 const LogicVRegister& src2) {
3419 uint64_t result[16];
3420 int laneCount = LaneCountFromFormat(vform);
3421 int pairs = laneCount / 2;
3422 for (int i = 0; i < pairs; ++i) {
3423 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
3424 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
3425 }
3426
3427 dst.ClearForWrite(vform);
3428 for (int i = 0; i < laneCount; ++i) {
3429 dst.SetUint(vform, i, result[i]);
3430 }
3431 return dst;
3432 }
3433
3434
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3435 LogicVRegister Simulator::zip1(VectorFormat vform,
3436 LogicVRegister dst,
3437 const LogicVRegister& src1,
3438 const LogicVRegister& src2) {
3439 uint64_t result[16];
3440 int laneCount = LaneCountFromFormat(vform);
3441 int pairs = laneCount / 2;
3442 for (int i = 0; i < pairs; ++i) {
3443 result[2 * i] = src1.Uint(vform, i);
3444 result[(2 * i) + 1] = src2.Uint(vform, i);
3445 }
3446
3447 dst.ClearForWrite(vform);
3448 for (int i = 0; i < laneCount; ++i) {
3449 dst.SetUint(vform, i, result[i]);
3450 }
3451 return dst;
3452 }
3453
3454
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3455 LogicVRegister Simulator::zip2(VectorFormat vform,
3456 LogicVRegister dst,
3457 const LogicVRegister& src1,
3458 const LogicVRegister& src2) {
3459 uint64_t result[16];
3460 int laneCount = LaneCountFromFormat(vform);
3461 int pairs = laneCount / 2;
3462 for (int i = 0; i < pairs; ++i) {
3463 result[2 * i] = src1.Uint(vform, pairs + i);
3464 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
3465 }
3466
3467 dst.ClearForWrite(vform);
3468 for (int i = 0; i < laneCount; ++i) {
3469 dst.SetUint(vform, i, result[i]);
3470 }
3471 return dst;
3472 }
3473
3474
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3475 LogicVRegister Simulator::uzp1(VectorFormat vform,
3476 LogicVRegister dst,
3477 const LogicVRegister& src1,
3478 const LogicVRegister& src2) {
3479 uint64_t result[32];
3480 int laneCount = LaneCountFromFormat(vform);
3481 for (int i = 0; i < laneCount; ++i) {
3482 result[i] = src1.Uint(vform, i);
3483 result[laneCount + i] = src2.Uint(vform, i);
3484 }
3485
3486 dst.ClearForWrite(vform);
3487 for (int i = 0; i < laneCount; ++i) {
3488 dst.SetUint(vform, i, result[2 * i]);
3489 }
3490 return dst;
3491 }
3492
3493
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3494 LogicVRegister Simulator::uzp2(VectorFormat vform,
3495 LogicVRegister dst,
3496 const LogicVRegister& src1,
3497 const LogicVRegister& src2) {
3498 uint64_t result[32];
3499 int laneCount = LaneCountFromFormat(vform);
3500 for (int i = 0; i < laneCount; ++i) {
3501 result[i] = src1.Uint(vform, i);
3502 result[laneCount + i] = src2.Uint(vform, i);
3503 }
3504
3505 dst.ClearForWrite(vform);
3506 for (int i = 0; i < laneCount; ++i) {
3507 dst.SetUint(vform, i, result[ (2 * i) + 1]);
3508 }
3509 return dst;
3510 }
3511
3512
3513 template <typename T>
FPAdd(T op1,T op2)3514 T Simulator::FPAdd(T op1, T op2) {
3515 T result = FPProcessNaNs(op1, op2);
3516 if (std::isnan(result)) return result;
3517
3518 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
3519 // inf + -inf returns the default NaN.
3520 FPProcessException();
3521 return FPDefaultNaN<T>();
3522 } else {
3523 // Other cases should be handled by standard arithmetic.
3524 return op1 + op2;
3525 }
3526 }
3527
3528
3529 template <typename T>
FPSub(T op1,T op2)3530 T Simulator::FPSub(T op1, T op2) {
3531 // NaNs should be handled elsewhere.
3532 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3533
3534 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
3535 // inf - inf returns the default NaN.
3536 FPProcessException();
3537 return FPDefaultNaN<T>();
3538 } else {
3539 // Other cases should be handled by standard arithmetic.
3540 return op1 - op2;
3541 }
3542 }
3543
3544
3545 template <typename T>
FPMul(T op1,T op2)3546 T Simulator::FPMul(T op1, T op2) {
3547 // NaNs should be handled elsewhere.
3548 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3549
3550 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3551 // inf * 0.0 returns the default NaN.
3552 FPProcessException();
3553 return FPDefaultNaN<T>();
3554 } else {
3555 // Other cases should be handled by standard arithmetic.
3556 return op1 * op2;
3557 }
3558 }
3559
3560
3561 template<typename T>
FPMulx(T op1,T op2)3562 T Simulator::FPMulx(T op1, T op2) {
3563 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3564 // inf * 0.0 returns +/-2.0.
3565 T two = 2.0;
3566 return copysign(1.0, op1) * copysign(1.0, op2) * two;
3567 }
3568 return FPMul(op1, op2);
3569 }
3570
3571
3572 template<typename T>
FPMulAdd(T a,T op1,T op2)3573 T Simulator::FPMulAdd(T a, T op1, T op2) {
3574 T result = FPProcessNaNs3(a, op1, op2);
3575
3576 T sign_a = copysign(1.0, a);
3577 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
3578 bool isinf_prod = std::isinf(op1) || std::isinf(op2);
3579 bool operation_generates_nan =
3580 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
3581 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
3582 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
3583
3584 if (std::isnan(result)) {
3585 // Generated NaNs override quiet NaNs propagated from a.
3586 if (operation_generates_nan && IsQuietNaN(a)) {
3587 FPProcessException();
3588 return FPDefaultNaN<T>();
3589 } else {
3590 return result;
3591 }
3592 }
3593
3594 // If the operation would produce a NaN, return the default NaN.
3595 if (operation_generates_nan) {
3596 FPProcessException();
3597 return FPDefaultNaN<T>();
3598 }
3599
3600 // Work around broken fma implementations for exact zero results: The sign of
3601 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3602 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3603 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3604 }
3605
3606 result = FusedMultiplyAdd(op1, op2, a);
3607 VIXL_ASSERT(!std::isnan(result));
3608
3609 // Work around broken fma implementations for rounded zero results: If a is
3610 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3611 if ((a == 0.0) && (result == 0.0)) {
3612 return copysign(0.0, sign_prod);
3613 }
3614
3615 return result;
3616 }
3617
3618
3619 template <typename T>
FPDiv(T op1,T op2)3620 T Simulator::FPDiv(T op1, T op2) {
3621 // NaNs should be handled elsewhere.
3622 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3623
3624 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3625 // inf / inf and 0.0 / 0.0 return the default NaN.
3626 FPProcessException();
3627 return FPDefaultNaN<T>();
3628 } else {
3629 if (op2 == 0.0) FPProcessException();
3630
3631 // Other cases should be handled by standard arithmetic.
3632 return op1 / op2;
3633 }
3634 }
3635
3636
3637 template <typename T>
FPSqrt(T op)3638 T Simulator::FPSqrt(T op) {
3639 if (std::isnan(op)) {
3640 return FPProcessNaN(op);
3641 } else if (op < 0.0) {
3642 FPProcessException();
3643 return FPDefaultNaN<T>();
3644 } else {
3645 return sqrt(op);
3646 }
3647 }
3648
3649
3650 template <typename T>
FPMax(T a,T b)3651 T Simulator::FPMax(T a, T b) {
3652 T result = FPProcessNaNs(a, b);
3653 if (std::isnan(result)) return result;
3654
3655 if ((a == 0.0) && (b == 0.0) &&
3656 (copysign(1.0, a) != copysign(1.0, b))) {
3657 // a and b are zero, and the sign differs: return +0.0.
3658 return 0.0;
3659 } else {
3660 return (a > b) ? a : b;
3661 }
3662 }
3663
3664
3665 template <typename T>
FPMaxNM(T a,T b)3666 T Simulator::FPMaxNM(T a, T b) {
3667 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3668 a = kFP64NegativeInfinity;
3669 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3670 b = kFP64NegativeInfinity;
3671 }
3672
3673 T result = FPProcessNaNs(a, b);
3674 return std::isnan(result) ? result : FPMax(a, b);
3675 }
3676
3677
3678 template <typename T>
FPMin(T a,T b)3679 T Simulator::FPMin(T a, T b) {
3680 T result = FPProcessNaNs(a, b);
3681 if (std::isnan(result)) return result;
3682
3683 if ((a == 0.0) && (b == 0.0) &&
3684 (copysign(1.0, a) != copysign(1.0, b))) {
3685 // a and b are zero, and the sign differs: return -0.0.
3686 return -0.0;
3687 } else {
3688 return (a < b) ? a : b;
3689 }
3690 }
3691
3692
3693 template <typename T>
FPMinNM(T a,T b)3694 T Simulator::FPMinNM(T a, T b) {
3695 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3696 a = kFP64PositiveInfinity;
3697 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3698 b = kFP64PositiveInfinity;
3699 }
3700
3701 T result = FPProcessNaNs(a, b);
3702 return std::isnan(result) ? result : FPMin(a, b);
3703 }
3704
3705
3706 template <typename T>
FPRecipStepFused(T op1,T op2)3707 T Simulator::FPRecipStepFused(T op1, T op2) {
3708 const T two = 2.0;
3709 if ((std::isinf(op1) && (op2 == 0.0))
3710 || ((op1 == 0.0) && (std::isinf(op2)))) {
3711 return two;
3712 } else if (std::isinf(op1) || std::isinf(op2)) {
3713 // Return +inf if signs match, otherwise -inf.
3714 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3715 : kFP64NegativeInfinity;
3716 } else {
3717 return FusedMultiplyAdd(op1, op2, two);
3718 }
3719 }
3720
3721
3722 template <typename T>
FPRSqrtStepFused(T op1,T op2)3723 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3724 const T one_point_five = 1.5;
3725 const T two = 2.0;
3726
3727 if ((std::isinf(op1) && (op2 == 0.0))
3728 || ((op1 == 0.0) && (std::isinf(op2)))) {
3729 return one_point_five;
3730 } else if (std::isinf(op1) || std::isinf(op2)) {
3731 // Return +inf if signs match, otherwise -inf.
3732 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3733 : kFP64NegativeInfinity;
3734 } else {
3735 // The multiply-add-halve operation must be fully fused, so avoid interim
3736 // rounding by checking which operand can be losslessly divided by two
3737 // before doing the multiply-add.
3738 if (std::isnormal(op1 / two)) {
3739 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3740 } else if (std::isnormal(op2 / two)) {
3741 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3742 } else {
3743 // Neither operand is normal after halving: the result is dominated by
3744 // the addition term, so just return that.
3745 return one_point_five;
3746 }
3747 }
3748 }
3749
3750
FPRoundInt(double value,FPRounding round_mode)3751 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3752 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3753 (value == kFP64NegativeInfinity)) {
3754 return value;
3755 } else if (std::isnan(value)) {
3756 return FPProcessNaN(value);
3757 }
3758
3759 double int_result = std::floor(value);
3760 double error = value - int_result;
3761 switch (round_mode) {
3762 case FPTieAway: {
3763 // Take care of correctly handling the range ]-0.5, -0.0], which must
3764 // yield -0.0.
3765 if ((-0.5 < value) && (value < 0.0)) {
3766 int_result = -0.0;
3767
3768 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3769 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3770 // result is positive, round up.
3771 int_result++;
3772 }
3773 break;
3774 }
3775 case FPTieEven: {
3776 // Take care of correctly handling the range [-0.5, -0.0], which must
3777 // yield -0.0.
3778 if ((-0.5 <= value) && (value < 0.0)) {
3779 int_result = -0.0;
3780
3781 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3782 // result is odd, round up.
3783 } else if ((error > 0.5) ||
3784 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3785 int_result++;
3786 }
3787 break;
3788 }
3789 case FPZero: {
3790 // If value>0 then we take floor(value)
3791 // otherwise, ceil(value).
3792 if (value < 0) {
3793 int_result = ceil(value);
3794 }
3795 break;
3796 }
3797 case FPNegativeInfinity: {
3798 // We always use floor(value).
3799 break;
3800 }
3801 case FPPositiveInfinity: {
3802 // Take care of correctly handling the range ]-1.0, -0.0], which must
3803 // yield -0.0.
3804 if ((-1.0 < value) && (value < 0.0)) {
3805 int_result = -0.0;
3806
3807 // If the error is non-zero, round up.
3808 } else if (error > 0.0) {
3809 int_result++;
3810 }
3811 break;
3812 }
3813 default: VIXL_UNIMPLEMENTED();
3814 }
3815 return int_result;
3816 }
3817
3818
FPToInt32(double value,FPRounding rmode)3819 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3820 value = FPRoundInt(value, rmode);
3821 if (value >= kWMaxInt) {
3822 return kWMaxInt;
3823 } else if (value < kWMinInt) {
3824 return kWMinInt;
3825 }
3826 return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3827 }
3828
3829
FPToInt64(double value,FPRounding rmode)3830 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3831 value = FPRoundInt(value, rmode);
3832 if (value >= kXMaxInt) {
3833 return kXMaxInt;
3834 } else if (value < kXMinInt) {
3835 return kXMinInt;
3836 }
3837 return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3838 }
3839
3840
FPToUInt32(double value,FPRounding rmode)3841 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3842 value = FPRoundInt(value, rmode);
3843 if (value >= kWMaxUInt) {
3844 return kWMaxUInt;
3845 } else if (value < 0.0) {
3846 return 0;
3847 }
3848 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3849 }
3850
3851
FPToUInt64(double value,FPRounding rmode)3852 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3853 value = FPRoundInt(value, rmode);
3854 if (value >= kXMaxUInt) {
3855 return kXMaxUInt;
3856 } else if (value < 0.0) {
3857 return 0;
3858 }
3859 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3860 }
3861
3862
3863 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3864 template <typename T> \
3865 LogicVRegister Simulator::FN(VectorFormat vform, \
3866 LogicVRegister dst, \
3867 const LogicVRegister& src1, \
3868 const LogicVRegister& src2) { \
3869 dst.ClearForWrite(vform); \
3870 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3871 T op1 = src1.Float<T>(i); \
3872 T op2 = src2.Float<T>(i); \
3873 T result; \
3874 if (PROCNAN) { \
3875 result = FPProcessNaNs(op1, op2); \
3876 if (!std::isnan(result)) { \
3877 result = OP(op1, op2); \
3878 } \
3879 } else { \
3880 result = OP(op1, op2); \
3881 } \
3882 dst.SetFloat(i, result); \
3883 } \
3884 return dst; \
3885 } \
3886 \
3887 LogicVRegister Simulator::FN(VectorFormat vform, \
3888 LogicVRegister dst, \
3889 const LogicVRegister& src1, \
3890 const LogicVRegister& src2) { \
3891 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
3892 FN<float>(vform, dst, src1, src2); \
3893 } else { \
3894 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
3895 FN<double>(vform, dst, src1, src2); \
3896 } \
3897 return dst; \
3898 }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)3899 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3900 #undef DEFINE_NEON_FP_VECTOR_OP
3901
3902
3903 LogicVRegister Simulator::fnmul(VectorFormat vform,
3904 LogicVRegister dst,
3905 const LogicVRegister& src1,
3906 const LogicVRegister& src2) {
3907 SimVRegister temp;
3908 LogicVRegister product = fmul(vform, temp, src1, src2);
3909 return fneg(vform, dst, product);
3910 }
3911
3912
3913 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3914 LogicVRegister Simulator::frecps(VectorFormat vform,
3915 LogicVRegister dst,
3916 const LogicVRegister& src1,
3917 const LogicVRegister& src2) {
3918 dst.ClearForWrite(vform);
3919 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3920 T op1 = -src1.Float<T>(i);
3921 T op2 = src2.Float<T>(i);
3922 T result = FPProcessNaNs(op1, op2);
3923 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3924 }
3925 return dst;
3926 }
3927
3928
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3929 LogicVRegister Simulator::frecps(VectorFormat vform,
3930 LogicVRegister dst,
3931 const LogicVRegister& src1,
3932 const LogicVRegister& src2) {
3933 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
3934 frecps<float>(vform, dst, src1, src2);
3935 } else {
3936 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
3937 frecps<double>(vform, dst, src1, src2);
3938 }
3939 return dst;
3940 }
3941
3942
3943 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3944 LogicVRegister Simulator::frsqrts(VectorFormat vform,
3945 LogicVRegister dst,
3946 const LogicVRegister& src1,
3947 const LogicVRegister& src2) {
3948 dst.ClearForWrite(vform);
3949 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3950 T op1 = -src1.Float<T>(i);
3951 T op2 = src2.Float<T>(i);
3952 T result = FPProcessNaNs(op1, op2);
3953 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3954 }
3955 return dst;
3956 }
3957
3958
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3959 LogicVRegister Simulator::frsqrts(VectorFormat vform,
3960 LogicVRegister dst,
3961 const LogicVRegister& src1,
3962 const LogicVRegister& src2) {
3963 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
3964 frsqrts<float>(vform, dst, src1, src2);
3965 } else {
3966 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
3967 frsqrts<double>(vform, dst, src1, src2);
3968 }
3969 return dst;
3970 }
3971
3972
3973 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3974 LogicVRegister Simulator::fcmp(VectorFormat vform,
3975 LogicVRegister dst,
3976 const LogicVRegister& src1,
3977 const LogicVRegister& src2,
3978 Condition cond) {
3979 dst.ClearForWrite(vform);
3980 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3981 bool result = false;
3982 T op1 = src1.Float<T>(i);
3983 T op2 = src2.Float<T>(i);
3984 T nan_result = FPProcessNaNs(op1, op2);
3985 if (!std::isnan(nan_result)) {
3986 switch (cond) {
3987 case eq: result = (op1 == op2); break;
3988 case ge: result = (op1 >= op2); break;
3989 case gt: result = (op1 > op2) ; break;
3990 case le: result = (op1 <= op2); break;
3991 case lt: result = (op1 < op2) ; break;
3992 default: VIXL_UNREACHABLE(); break;
3993 }
3994 }
3995 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3996 }
3997 return dst;
3998 }
3999
4000
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4001 LogicVRegister Simulator::fcmp(VectorFormat vform,
4002 LogicVRegister dst,
4003 const LogicVRegister& src1,
4004 const LogicVRegister& src2,
4005 Condition cond) {
4006 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4007 fcmp<float>(vform, dst, src1, src2, cond);
4008 } else {
4009 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4010 fcmp<double>(vform, dst, src1, src2, cond);
4011 }
4012 return dst;
4013 }
4014
4015
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)4016 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4017 LogicVRegister dst,
4018 const LogicVRegister& src,
4019 Condition cond) {
4020 SimVRegister temp;
4021 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4022 LogicVRegister zero_reg = dup_immediate(vform, temp, float_to_rawbits(0.0));
4023 fcmp<float>(vform, dst, src, zero_reg, cond);
4024 } else {
4025 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4026 LogicVRegister zero_reg = dup_immediate(vform, temp,
4027 double_to_rawbits(0.0));
4028 fcmp<double>(vform, dst, src, zero_reg, cond);
4029 }
4030 return dst;
4031 }
4032
4033
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4034 LogicVRegister Simulator::fabscmp(VectorFormat vform,
4035 LogicVRegister dst,
4036 const LogicVRegister& src1,
4037 const LogicVRegister& src2,
4038 Condition cond) {
4039 SimVRegister temp1, temp2;
4040 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4041 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
4042 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
4043 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
4044 } else {
4045 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4046 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
4047 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
4048 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
4049 }
4050 return dst;
4051 }
4052
4053
4054 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4055 LogicVRegister Simulator::fmla(VectorFormat vform,
4056 LogicVRegister dst,
4057 const LogicVRegister& src1,
4058 const LogicVRegister& src2) {
4059 dst.ClearForWrite(vform);
4060 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4061 T op1 = src1.Float<T>(i);
4062 T op2 = src2.Float<T>(i);
4063 T acc = dst.Float<T>(i);
4064 T result = FPMulAdd(acc, op1, op2);
4065 dst.SetFloat(i, result);
4066 }
4067 return dst;
4068 }
4069
4070
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4071 LogicVRegister Simulator::fmla(VectorFormat vform,
4072 LogicVRegister dst,
4073 const LogicVRegister& src1,
4074 const LogicVRegister& src2) {
4075 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4076 fmla<float>(vform, dst, src1, src2);
4077 } else {
4078 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4079 fmla<double>(vform, dst, src1, src2);
4080 }
4081 return dst;
4082 }
4083
4084
4085 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4086 LogicVRegister Simulator::fmls(VectorFormat vform,
4087 LogicVRegister dst,
4088 const LogicVRegister& src1,
4089 const LogicVRegister& src2) {
4090 dst.ClearForWrite(vform);
4091 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4092 T op1 = -src1.Float<T>(i);
4093 T op2 = src2.Float<T>(i);
4094 T acc = dst.Float<T>(i);
4095 T result = FPMulAdd(acc, op1, op2);
4096 dst.SetFloat(i, result);
4097 }
4098 return dst;
4099 }
4100
4101
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4102 LogicVRegister Simulator::fmls(VectorFormat vform,
4103 LogicVRegister dst,
4104 const LogicVRegister& src1,
4105 const LogicVRegister& src2) {
4106 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4107 fmls<float>(vform, dst, src1, src2);
4108 } else {
4109 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4110 fmls<double>(vform, dst, src1, src2);
4111 }
4112 return dst;
4113 }
4114
4115
4116 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4117 LogicVRegister Simulator::fneg(VectorFormat vform,
4118 LogicVRegister dst,
4119 const LogicVRegister& src) {
4120 dst.ClearForWrite(vform);
4121 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4122 T op = src.Float<T>(i);
4123 op = -op;
4124 dst.SetFloat(i, op);
4125 }
4126 return dst;
4127 }
4128
4129
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4130 LogicVRegister Simulator::fneg(VectorFormat vform,
4131 LogicVRegister dst,
4132 const LogicVRegister& src) {
4133 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4134 fneg<float>(vform, dst, src);
4135 } else {
4136 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4137 fneg<double>(vform, dst, src);
4138 }
4139 return dst;
4140 }
4141
4142
4143 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4144 LogicVRegister Simulator::fabs_(VectorFormat vform,
4145 LogicVRegister dst,
4146 const LogicVRegister& src) {
4147 dst.ClearForWrite(vform);
4148 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4149 T op = src.Float<T>(i);
4150 if (copysign(1.0, op) < 0.0) {
4151 op = -op;
4152 }
4153 dst.SetFloat(i, op);
4154 }
4155 return dst;
4156 }
4157
4158
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4159 LogicVRegister Simulator::fabs_(VectorFormat vform,
4160 LogicVRegister dst,
4161 const LogicVRegister& src) {
4162 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4163 fabs_<float>(vform, dst, src);
4164 } else {
4165 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4166 fabs_<double>(vform, dst, src);
4167 }
4168 return dst;
4169 }
4170
4171
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4172 LogicVRegister Simulator::fabd(VectorFormat vform,
4173 LogicVRegister dst,
4174 const LogicVRegister& src1,
4175 const LogicVRegister& src2) {
4176 SimVRegister temp;
4177 fsub(vform, temp, src1, src2);
4178 fabs_(vform, dst, temp);
4179 return dst;
4180 }
4181
4182
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4183 LogicVRegister Simulator::fsqrt(VectorFormat vform,
4184 LogicVRegister dst,
4185 const LogicVRegister& src) {
4186 dst.ClearForWrite(vform);
4187 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4188 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4189 float result = FPSqrt(src.Float<float>(i));
4190 dst.SetFloat(i, result);
4191 }
4192 } else {
4193 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4194 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4195 double result = FPSqrt(src.Float<double>(i));
4196 dst.SetFloat(i, result);
4197 }
4198 }
4199 return dst;
4200 }
4201
4202
4203 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
4204 LogicVRegister Simulator::FNP(VectorFormat vform, \
4205 LogicVRegister dst, \
4206 const LogicVRegister& src1, \
4207 const LogicVRegister& src2) { \
4208 SimVRegister temp1, temp2; \
4209 uzp1(vform, temp1, src1, src2); \
4210 uzp2(vform, temp2, src1, src2); \
4211 FN(vform, dst, temp1, temp2); \
4212 return dst; \
4213 } \
4214 \
4215 LogicVRegister Simulator::FNP(VectorFormat vform, \
4216 LogicVRegister dst, \
4217 const LogicVRegister& src) { \
4218 if (vform == kFormatS) { \
4219 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
4220 dst.SetFloat(0, result); \
4221 } else { \
4222 VIXL_ASSERT(vform == kFormatD); \
4223 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
4224 dst.SetFloat(0, result); \
4225 } \
4226 dst.ClearForWrite(vform); \
4227 return dst; \
4228 }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)4229 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
4230 #undef DEFINE_NEON_FP_PAIR_OP
4231
4232
4233 LogicVRegister Simulator::fminmaxv(VectorFormat vform,
4234 LogicVRegister dst,
4235 const LogicVRegister& src,
4236 FPMinMaxOp Op) {
4237 VIXL_ASSERT(vform == kFormat4S);
4238 USE(vform);
4239 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
4240 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
4241 float result = (this->*Op)(result1, result2);
4242 dst.ClearForWrite(kFormatS);
4243 dst.SetFloat<float>(0, result);
4244 return dst;
4245 }
4246
4247
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4248 LogicVRegister Simulator::fmaxv(VectorFormat vform,
4249 LogicVRegister dst,
4250 const LogicVRegister& src) {
4251 return fminmaxv(vform, dst, src, &Simulator::FPMax);
4252 }
4253
4254
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4255 LogicVRegister Simulator::fminv(VectorFormat vform,
4256 LogicVRegister dst,
4257 const LogicVRegister& src) {
4258 return fminmaxv(vform, dst, src, &Simulator::FPMin);
4259 }
4260
4261
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4262 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
4263 LogicVRegister dst,
4264 const LogicVRegister& src) {
4265 return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
4266 }
4267
4268
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4269 LogicVRegister Simulator::fminnmv(VectorFormat vform,
4270 LogicVRegister dst,
4271 const LogicVRegister& src) {
4272 return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
4273 }
4274
4275
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4276 LogicVRegister Simulator::fmul(VectorFormat vform,
4277 LogicVRegister dst,
4278 const LogicVRegister& src1,
4279 const LogicVRegister& src2,
4280 int index) {
4281 dst.ClearForWrite(vform);
4282 SimVRegister temp;
4283 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4284 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4285 fmul<float>(vform, dst, src1, index_reg);
4286
4287 } else {
4288 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4289 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4290 fmul<double>(vform, dst, src1, index_reg);
4291 }
4292 return dst;
4293 }
4294
4295
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4296 LogicVRegister Simulator::fmla(VectorFormat vform,
4297 LogicVRegister dst,
4298 const LogicVRegister& src1,
4299 const LogicVRegister& src2,
4300 int index) {
4301 dst.ClearForWrite(vform);
4302 SimVRegister temp;
4303 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4304 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4305 fmla<float>(vform, dst, src1, index_reg);
4306
4307 } else {
4308 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4309 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4310 fmla<double>(vform, dst, src1, index_reg);
4311 }
4312 return dst;
4313 }
4314
4315
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4316 LogicVRegister Simulator::fmls(VectorFormat vform,
4317 LogicVRegister dst,
4318 const LogicVRegister& src1,
4319 const LogicVRegister& src2,
4320 int index) {
4321 dst.ClearForWrite(vform);
4322 SimVRegister temp;
4323 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4324 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4325 fmls<float>(vform, dst, src1, index_reg);
4326
4327 } else {
4328 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4329 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4330 fmls<double>(vform, dst, src1, index_reg);
4331 }
4332 return dst;
4333 }
4334
4335
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4336 LogicVRegister Simulator::fmulx(VectorFormat vform,
4337 LogicVRegister dst,
4338 const LogicVRegister& src1,
4339 const LogicVRegister& src2,
4340 int index) {
4341 dst.ClearForWrite(vform);
4342 SimVRegister temp;
4343 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4344 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4345 fmulx<float>(vform, dst, src1, index_reg);
4346
4347 } else {
4348 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4349 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4350 fmulx<double>(vform, dst, src1, index_reg);
4351 }
4352 return dst;
4353 }
4354
4355
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception)4356 LogicVRegister Simulator::frint(VectorFormat vform,
4357 LogicVRegister dst,
4358 const LogicVRegister& src,
4359 FPRounding rounding_mode,
4360 bool inexact_exception) {
4361 dst.ClearForWrite(vform);
4362 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4363 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4364 float input = src.Float<float>(i);
4365 float rounded = FPRoundInt(input, rounding_mode);
4366 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4367 FPProcessException();
4368 }
4369 dst.SetFloat<float>(i, rounded);
4370 }
4371 } else {
4372 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4373 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4374 double input = src.Float<double>(i);
4375 double rounded = FPRoundInt(input, rounding_mode);
4376 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4377 FPProcessException();
4378 }
4379 dst.SetFloat<double>(i, rounded);
4380 }
4381 }
4382 return dst;
4383 }
4384
4385
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4386 LogicVRegister Simulator::fcvts(VectorFormat vform,
4387 LogicVRegister dst,
4388 const LogicVRegister& src,
4389 FPRounding rounding_mode,
4390 int fbits) {
4391 dst.ClearForWrite(vform);
4392 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4393 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4394 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4395 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
4396 }
4397 } else {
4398 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4399 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4400 double op = src.Float<double>(i) * std::pow(2.0, fbits);
4401 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
4402 }
4403 }
4404 return dst;
4405 }
4406
4407
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4408 LogicVRegister Simulator::fcvtu(VectorFormat vform,
4409 LogicVRegister dst,
4410 const LogicVRegister& src,
4411 FPRounding rounding_mode,
4412 int fbits) {
4413 dst.ClearForWrite(vform);
4414 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4415 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4416 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4417 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
4418 }
4419 } else {
4420 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4421 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4422 double op = src.Float<double>(i) * std::pow(2.0, fbits);
4423 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
4424 }
4425 }
4426 return dst;
4427 }
4428
4429
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4430 LogicVRegister Simulator::fcvtl(VectorFormat vform,
4431 LogicVRegister dst,
4432 const LogicVRegister& src) {
4433 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4434 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4435 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
4436 }
4437 } else {
4438 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4439 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4440 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
4441 }
4442 }
4443 return dst;
4444 }
4445
4446
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4447 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
4448 LogicVRegister dst,
4449 const LogicVRegister& src) {
4450 int lane_count = LaneCountFromFormat(vform);
4451 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4452 for (int i = 0; i < lane_count; i++) {
4453 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
4454 }
4455 } else {
4456 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4457 for (int i = 0; i < lane_count; i++) {
4458 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
4459 }
4460 }
4461 return dst;
4462 }
4463
4464
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4465 LogicVRegister Simulator::fcvtn(VectorFormat vform,
4466 LogicVRegister dst,
4467 const LogicVRegister& src) {
4468 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4469 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4470 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
4471 }
4472 } else {
4473 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4474 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4475 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
4476 }
4477 }
4478 return dst;
4479 }
4480
4481
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4482 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
4483 LogicVRegister dst,
4484 const LogicVRegister& src) {
4485 int lane_count = LaneCountFromFormat(vform) / 2;
4486 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4487 for (int i = lane_count - 1; i >= 0; i--) {
4488 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
4489 }
4490 } else {
4491 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4492 for (int i = lane_count - 1; i >= 0; i--) {
4493 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
4494 }
4495 }
4496 return dst;
4497 }
4498
4499
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4500 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
4501 LogicVRegister dst,
4502 const LogicVRegister& src) {
4503 dst.ClearForWrite(vform);
4504 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4505 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4506 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
4507 }
4508 return dst;
4509 }
4510
4511
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4512 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
4513 LogicVRegister dst,
4514 const LogicVRegister& src) {
4515 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4516 int lane_count = LaneCountFromFormat(vform) / 2;
4517 for (int i = lane_count - 1; i >= 0; i--) {
4518 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
4519 }
4520 return dst;
4521 }
4522
4523
4524 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)4525 double Simulator::recip_sqrt_estimate(double a) {
4526 int q0, q1, s;
4527 double r;
4528 if (a < 0.5) {
4529 q0 = static_cast<int>(a * 512.0);
4530 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
4531 } else {
4532 q1 = static_cast<int>(a * 256.0);
4533 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
4534 }
4535 s = static_cast<int>(256.0 * r + 0.5);
4536 return static_cast<double>(s) / 256.0;
4537 }
4538
4539
Bits(uint64_t val,int start_bit,int end_bit)4540 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
4541 return unsigned_bitextract_64(start_bit, end_bit, val);
4542 }
4543
4544
4545 template <typename T>
FPRecipSqrtEstimate(T op)4546 T Simulator::FPRecipSqrtEstimate(T op) {
4547 if (std::isnan(op)) {
4548 return FPProcessNaN(op);
4549 } else if (op == 0.0) {
4550 if (copysign(1.0, op) < 0.0) {
4551 return kFP64NegativeInfinity;
4552 } else {
4553 return kFP64PositiveInfinity;
4554 }
4555 } else if (copysign(1.0, op) < 0.0) {
4556 FPProcessException();
4557 return FPDefaultNaN<T>();
4558 } else if (std::isinf(op)) {
4559 return 0.0;
4560 } else {
4561 uint64_t fraction;
4562 int exp, result_exp;
4563
4564 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4565 exp = float_exp(op);
4566 fraction = float_mantissa(op);
4567 fraction <<= 29;
4568 } else {
4569 exp = double_exp(op);
4570 fraction = double_mantissa(op);
4571 }
4572
4573 if (exp == 0) {
4574 while (Bits(fraction, 51, 51) == 0) {
4575 fraction = Bits(fraction, 50, 0) << 1;
4576 exp -= 1;
4577 }
4578 fraction = Bits(fraction, 50, 0) << 1;
4579 }
4580
4581 double scaled;
4582 if (Bits(exp, 0, 0) == 0) {
4583 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4584 } else {
4585 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
4586 }
4587
4588 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4589 result_exp = (380 - exp) / 2;
4590 } else {
4591 result_exp = (3068 - exp) / 2;
4592 }
4593
4594 uint64_t estimate = double_to_rawbits(recip_sqrt_estimate(scaled));
4595
4596 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4597 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4598 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
4599 return float_pack(0, exp_bits, est_bits);
4600 } else {
4601 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
4602 }
4603 }
4604 }
4605
4606
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4607 LogicVRegister Simulator::frsqrte(VectorFormat vform,
4608 LogicVRegister dst,
4609 const LogicVRegister& src) {
4610 dst.ClearForWrite(vform);
4611 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4612 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4613 float input = src.Float<float>(i);
4614 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
4615 }
4616 } else {
4617 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4618 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4619 double input = src.Float<double>(i);
4620 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
4621 }
4622 }
4623 return dst;
4624 }
4625
4626 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)4627 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
4628 uint32_t sign;
4629
4630 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4631 sign = float_sign(op);
4632 } else {
4633 sign = double_sign(op);
4634 }
4635
4636 if (std::isnan(op)) {
4637 return FPProcessNaN(op);
4638 } else if (std::isinf(op)) {
4639 return (sign == 1) ? -0.0 : 0.0;
4640 } else if (op == 0.0) {
4641 FPProcessException(); // FPExc_DivideByZero exception.
4642 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4643 } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof)
4644 (std::fabs(op) < std::pow(2.0, -128.0))) ||
4645 ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof)
4646 (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4647 bool overflow_to_inf = false;
4648 switch (rounding) {
4649 case FPTieEven: overflow_to_inf = true; break;
4650 case FPPositiveInfinity: overflow_to_inf = (sign == 0); break;
4651 case FPNegativeInfinity: overflow_to_inf = (sign == 1); break;
4652 case FPZero: overflow_to_inf = false; break;
4653 default: break;
4654 }
4655 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
4656 if (overflow_to_inf) {
4657 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4658 } else {
4659 // Return FPMaxNormal(sign).
4660 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4661 return float_pack(sign, 0xfe, 0x07fffff);
4662 } else {
4663 return double_pack(sign, 0x7fe, 0x0fffffffffffffl);
4664 }
4665 }
4666 } else {
4667 uint64_t fraction;
4668 int exp, result_exp;
4669 uint32_t sign;
4670
4671 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4672 sign = float_sign(op);
4673 exp = float_exp(op);
4674 fraction = float_mantissa(op);
4675 fraction <<= 29;
4676 } else {
4677 sign = double_sign(op);
4678 exp = double_exp(op);
4679 fraction = double_mantissa(op);
4680 }
4681
4682 if (exp == 0) {
4683 if (Bits(fraction, 51, 51) == 0) {
4684 exp -= 1;
4685 fraction = Bits(fraction, 49, 0) << 2;
4686 } else {
4687 fraction = Bits(fraction, 50, 0) << 1;
4688 }
4689 }
4690
4691 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4692
4693 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4694 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
4695 } else {
4696 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
4697 }
4698
4699 double estimate = recip_estimate(scaled);
4700
4701 fraction = double_mantissa(estimate);
4702 if (result_exp == 0) {
4703 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4704 } else if (result_exp == -1) {
4705 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4706 result_exp = 0;
4707 }
4708 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4709 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4710 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4711 return float_pack(sign, exp_bits, frac_bits);
4712 } else {
4713 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4714 }
4715 }
4716 }
4717
4718
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)4719 LogicVRegister Simulator::frecpe(VectorFormat vform,
4720 LogicVRegister dst,
4721 const LogicVRegister& src,
4722 FPRounding round) {
4723 dst.ClearForWrite(vform);
4724 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4725 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4726 float input = src.Float<float>(i);
4727 dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4728 }
4729 } else {
4730 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4731 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4732 double input = src.Float<double>(i);
4733 dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4734 }
4735 }
4736 return dst;
4737 }
4738
4739
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4740 LogicVRegister Simulator::ursqrte(VectorFormat vform,
4741 LogicVRegister dst,
4742 const LogicVRegister& src) {
4743 dst.ClearForWrite(vform);
4744 uint64_t operand;
4745 uint32_t result;
4746 double dp_operand, dp_result;
4747 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4748 operand = src.Uint(vform, i);
4749 if (operand <= 0x3FFFFFFF) {
4750 result = 0xFFFFFFFF;
4751 } else {
4752 dp_operand = operand * std::pow(2.0, -32);
4753 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4754 result = static_cast<uint32_t>(dp_result);
4755 }
4756 dst.SetUint(vform, i, result);
4757 }
4758 return dst;
4759 }
4760
4761
4762 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)4763 double Simulator::recip_estimate(double a) {
4764 int q, s;
4765 double r;
4766 q = static_cast<int>(a * 512.0);
4767 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4768 s = static_cast<int>(256.0 * r + 0.5);
4769 return static_cast<double>(s) / 256.0;
4770 }
4771
4772
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4773 LogicVRegister Simulator::urecpe(VectorFormat vform,
4774 LogicVRegister dst,
4775 const LogicVRegister& src) {
4776 dst.ClearForWrite(vform);
4777 uint64_t operand;
4778 uint32_t result;
4779 double dp_operand, dp_result;
4780 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4781 operand = src.Uint(vform, i);
4782 if (operand <= 0x7FFFFFFF) {
4783 result = 0xFFFFFFFF;
4784 } else {
4785 dp_operand = operand * std::pow(2.0, -32);
4786 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4787 result = static_cast<uint32_t>(dp_result);
4788 }
4789 dst.SetUint(vform, i, result);
4790 }
4791 return dst;
4792 }
4793
4794 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4795 LogicVRegister Simulator::frecpx(VectorFormat vform,
4796 LogicVRegister dst,
4797 const LogicVRegister& src) {
4798 dst.ClearForWrite(vform);
4799 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4800 T op = src.Float<T>(i);
4801 T result;
4802 if (std::isnan(op)) {
4803 result = FPProcessNaN(op);
4804 } else {
4805 int exp;
4806 uint32_t sign;
4807 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4808 sign = float_sign(op);
4809 exp = float_exp(op);
4810 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4811 result = float_pack(sign, exp, 0);
4812 } else {
4813 sign = double_sign(op);
4814 exp = double_exp(op);
4815 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4816 result = double_pack(sign, exp, 0);
4817 }
4818 }
4819 dst.SetFloat(i, result);
4820 }
4821 return dst;
4822 }
4823
4824
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4825 LogicVRegister Simulator::frecpx(VectorFormat vform,
4826 LogicVRegister dst,
4827 const LogicVRegister& src) {
4828 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4829 frecpx<float>(vform, dst, src);
4830 } else {
4831 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4832 frecpx<double>(vform, dst, src);
4833 }
4834 return dst;
4835 }
4836
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4837 LogicVRegister Simulator::scvtf(VectorFormat vform,
4838 LogicVRegister dst,
4839 const LogicVRegister& src,
4840 int fbits,
4841 FPRounding round) {
4842 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4843 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4844 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4845 dst.SetFloat<float>(i, result);
4846 } else {
4847 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4848 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4849 dst.SetFloat<double>(i, result);
4850 }
4851 }
4852 return dst;
4853 }
4854
4855
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4856 LogicVRegister Simulator::ucvtf(VectorFormat vform,
4857 LogicVRegister dst,
4858 const LogicVRegister& src,
4859 int fbits,
4860 FPRounding round) {
4861 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4862 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4863 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4864 dst.SetFloat<float>(i, result);
4865 } else {
4866 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4867 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4868 dst.SetFloat<double>(i, result);
4869 }
4870 }
4871 return dst;
4872 }
4873
4874
4875 } // namespace vixl
4876
4877 #endif // VIXL_INCLUDE_SIMULATOR
4878