1 // Copyright 2015, ARM Limited
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <cmath>
28 #include "vixl/a64/simulator-a64.h"
29
30 namespace vixl {
31
FPDefaultNaN()32 template<> double Simulator::FPDefaultNaN<double>() {
33 return kFP64DefaultNaN;
34 }
35
36
FPDefaultNaN()37 template<> float Simulator::FPDefaultNaN<float>() {
38 return kFP32DefaultNaN;
39 }
40
41 // See FPRound for a description of this function.
FPRoundToDouble(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)42 static inline double FPRoundToDouble(int64_t sign, int64_t exponent,
43 uint64_t mantissa, FPRounding round_mode) {
44 int64_t bits =
45 FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
46 exponent,
47 mantissa,
48 round_mode);
49 return rawbits_to_double(bits);
50 }
51
52
53 // See FPRound for a description of this function.
FPRoundToFloat(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)54 static inline float FPRoundToFloat(int64_t sign, int64_t exponent,
55 uint64_t mantissa, FPRounding round_mode) {
56 int32_t bits =
57 FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
58 exponent,
59 mantissa,
60 round_mode);
61 return rawbits_to_float(bits);
62 }
63
64
65 // See FPRound for a description of this function.
FPRoundToFloat16(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)66 static inline float16 FPRoundToFloat16(int64_t sign,
67 int64_t exponent,
68 uint64_t mantissa,
69 FPRounding round_mode) {
70 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
71 sign, exponent, mantissa, round_mode);
72 }
73
74
FixedToDouble(int64_t src,int fbits,FPRounding round)75 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
76 if (src >= 0) {
77 return UFixedToDouble(src, fbits, round);
78 } else {
79 // This works for all negative values, including INT64_MIN.
80 return -UFixedToDouble(-src, fbits, round);
81 }
82 }
83
84
UFixedToDouble(uint64_t src,int fbits,FPRounding round)85 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
86 // An input of 0 is a special case because the result is effectively
87 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
88 if (src == 0) {
89 return 0.0;
90 }
91
92 // Calculate the exponent. The highest significant bit will have the value
93 // 2^exponent.
94 const int highest_significant_bit = 63 - CountLeadingZeros(src);
95 const int64_t exponent = highest_significant_bit - fbits;
96
97 return FPRoundToDouble(0, exponent, src, round);
98 }
99
100
FixedToFloat(int64_t src,int fbits,FPRounding round)101 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
102 if (src >= 0) {
103 return UFixedToFloat(src, fbits, round);
104 } else {
105 // This works for all negative values, including INT64_MIN.
106 return -UFixedToFloat(-src, fbits, round);
107 }
108 }
109
110
UFixedToFloat(uint64_t src,int fbits,FPRounding round)111 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
112 // An input of 0 is a special case because the result is effectively
113 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
114 if (src == 0) {
115 return 0.0f;
116 }
117
118 // Calculate the exponent. The highest significant bit will have the value
119 // 2^exponent.
120 const int highest_significant_bit = 63 - CountLeadingZeros(src);
121 const int32_t exponent = highest_significant_bit - fbits;
122
123 return FPRoundToFloat(0, exponent, src, round);
124 }
125
126
FPToDouble(float value)127 double Simulator::FPToDouble(float value) {
128 switch (std::fpclassify(value)) {
129 case FP_NAN: {
130 if (IsSignallingNaN(value)) {
131 FPProcessException();
132 }
133 if (DN()) return kFP64DefaultNaN;
134
135 // Convert NaNs as the processor would:
136 // - The sign is propagated.
137 // - The payload (mantissa) is transferred entirely, except that the top
138 // bit is forced to '1', making the result a quiet NaN. The unused
139 // (low-order) payload bits are set to 0.
140 uint32_t raw = float_to_rawbits(value);
141
142 uint64_t sign = raw >> 31;
143 uint64_t exponent = (1 << 11) - 1;
144 uint64_t payload = unsigned_bitextract_64(21, 0, raw);
145 payload <<= (52 - 23); // The unused low-order bits should be 0.
146 payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
147
148 return rawbits_to_double((sign << 63) | (exponent << 52) | payload);
149 }
150
151 case FP_ZERO:
152 case FP_NORMAL:
153 case FP_SUBNORMAL:
154 case FP_INFINITE: {
155 // All other inputs are preserved in a standard cast, because every value
156 // representable using an IEEE-754 float is also representable using an
157 // IEEE-754 double.
158 return static_cast<double>(value);
159 }
160 }
161
162 VIXL_UNREACHABLE();
163 return static_cast<double>(value);
164 }
165
166
FPToFloat(float16 value)167 float Simulator::FPToFloat(float16 value) {
168 uint32_t sign = value >> 15;
169 uint32_t exponent = unsigned_bitextract_32(
170 kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits,
171 value);
172 uint32_t mantissa = unsigned_bitextract_32(
173 kFloat16MantissaBits - 1, 0, value);
174
175 switch (float16classify(value)) {
176 case FP_ZERO:
177 return (sign == 0) ? 0.0f : -0.0f;
178
179 case FP_INFINITE:
180 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
181
182 case FP_SUBNORMAL: {
183 // Calculate shift required to put mantissa into the most-significant bits
184 // of the destination mantissa.
185 int shift = CountLeadingZeros(mantissa << (32 - 10));
186
187 // Shift mantissa and discard implicit '1'.
188 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
189 mantissa &= (1 << kFloatMantissaBits) - 1;
190
191 // Adjust the exponent for the shift applied, and rebias.
192 exponent = exponent - shift + (-15 + 127);
193 break;
194 }
195
196 case FP_NAN:
197 if (IsSignallingNaN(value)) {
198 FPProcessException();
199 }
200 if (DN()) return kFP32DefaultNaN;
201
202 // Convert NaNs as the processor would:
203 // - The sign is propagated.
204 // - The payload (mantissa) is transferred entirely, except that the top
205 // bit is forced to '1', making the result a quiet NaN. The unused
206 // (low-order) payload bits are set to 0.
207 exponent = (1 << kFloatExponentBits) - 1;
208
209 // Increase bits in mantissa, making low-order bits 0.
210 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
211 mantissa |= 1 << 22; // Force a quiet NaN.
212 break;
213
214 case FP_NORMAL:
215 // Increase bits in mantissa, making low-order bits 0.
216 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
217
218 // Change exponent bias.
219 exponent += (-15 + 127);
220 break;
221
222 default: VIXL_UNREACHABLE();
223 }
224 return rawbits_to_float((sign << 31) |
225 (exponent << kFloatMantissaBits) |
226 mantissa);
227 }
228
229
FPToFloat16(float value,FPRounding round_mode)230 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
231 // Only the FPTieEven rounding mode is implemented.
232 VIXL_ASSERT(round_mode == FPTieEven);
233 USE(round_mode);
234
235 uint32_t raw = float_to_rawbits(value);
236 int32_t sign = raw >> 31;
237 int32_t exponent = unsigned_bitextract_32(30, 23, raw) - 127;
238 uint32_t mantissa = unsigned_bitextract_32(22, 0, raw);
239
240 switch (std::fpclassify(value)) {
241 case FP_NAN: {
242 if (IsSignallingNaN(value)) {
243 FPProcessException();
244 }
245 if (DN()) return kFP16DefaultNaN;
246
247 // Convert NaNs as the processor would:
248 // - The sign is propagated.
249 // - The payload (mantissa) is transferred as much as possible, except
250 // that the top bit is forced to '1', making the result a quiet NaN.
251 float16 result = (sign == 0) ? kFP16PositiveInfinity
252 : kFP16NegativeInfinity;
253 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
254 result |= (1 << 9); // Force a quiet NaN;
255 return result;
256 }
257
258 case FP_ZERO:
259 return (sign == 0) ? 0 : 0x8000;
260
261 case FP_INFINITE:
262 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
263
264 case FP_NORMAL:
265 case FP_SUBNORMAL: {
266 // Convert float-to-half as the processor would, assuming that FPCR.FZ
267 // (flush-to-zero) is not set.
268
269 // Add the implicit '1' bit to the mantissa.
270 mantissa += (1 << 23);
271 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
272 }
273 }
274
275 VIXL_UNREACHABLE();
276 return 0;
277 }
278
279
FPToFloat16(double value,FPRounding round_mode)280 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
281 // Only the FPTieEven rounding mode is implemented.
282 VIXL_ASSERT(round_mode == FPTieEven);
283 USE(round_mode);
284
285 uint64_t raw = double_to_rawbits(value);
286 int32_t sign = raw >> 63;
287 int64_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
288 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
289
290 switch (std::fpclassify(value)) {
291 case FP_NAN: {
292 if (IsSignallingNaN(value)) {
293 FPProcessException();
294 }
295 if (DN()) return kFP16DefaultNaN;
296
297 // Convert NaNs as the processor would:
298 // - The sign is propagated.
299 // - The payload (mantissa) is transferred as much as possible, except
300 // that the top bit is forced to '1', making the result a quiet NaN.
301 float16 result = (sign == 0) ? kFP16PositiveInfinity
302 : kFP16NegativeInfinity;
303 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
304 result |= (1 << 9); // Force a quiet NaN;
305 return result;
306 }
307
308 case FP_ZERO:
309 return (sign == 0) ? 0 : 0x8000;
310
311 case FP_INFINITE:
312 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
313
314 case FP_NORMAL:
315 case FP_SUBNORMAL: {
316 // Convert double-to-half as the processor would, assuming that FPCR.FZ
317 // (flush-to-zero) is not set.
318
319 // Add the implicit '1' bit to the mantissa.
320 mantissa += (UINT64_C(1) << 52);
321 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
322 }
323 }
324
325 VIXL_UNREACHABLE();
326 return 0;
327 }
328
329
FPToFloat(double value,FPRounding round_mode)330 float Simulator::FPToFloat(double value, FPRounding round_mode) {
331 // Only the FPTieEven rounding mode is implemented.
332 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
333 USE(round_mode);
334
335 switch (std::fpclassify(value)) {
336 case FP_NAN: {
337 if (IsSignallingNaN(value)) {
338 FPProcessException();
339 }
340 if (DN()) return kFP32DefaultNaN;
341
342 // Convert NaNs as the processor would:
343 // - The sign is propagated.
344 // - The payload (mantissa) is transferred as much as possible, except
345 // that the top bit is forced to '1', making the result a quiet NaN.
346 uint64_t raw = double_to_rawbits(value);
347
348 uint32_t sign = raw >> 63;
349 uint32_t exponent = (1 << 8) - 1;
350 uint32_t payload = unsigned_bitextract_64(50, 52 - 23, raw);
351 payload |= (1 << 22); // Force a quiet NaN.
352
353 return rawbits_to_float((sign << 31) | (exponent << 23) | payload);
354 }
355
356 case FP_ZERO:
357 case FP_INFINITE: {
358 // In a C++ cast, any value representable in the target type will be
359 // unchanged. This is always the case for +/-0.0 and infinities.
360 return static_cast<float>(value);
361 }
362
363 case FP_NORMAL:
364 case FP_SUBNORMAL: {
365 // Convert double-to-float as the processor would, assuming that FPCR.FZ
366 // (flush-to-zero) is not set.
367 uint64_t raw = double_to_rawbits(value);
368 // Extract the IEEE-754 double components.
369 uint32_t sign = raw >> 63;
370 // Extract the exponent and remove the IEEE-754 encoding bias.
371 int32_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
372 // Extract the mantissa and add the implicit '1' bit.
373 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
374 if (std::fpclassify(value) == FP_NORMAL) {
375 mantissa |= (UINT64_C(1) << 52);
376 }
377 return FPRoundToFloat(sign, exponent, mantissa, round_mode);
378 }
379 }
380
381 VIXL_UNREACHABLE();
382 return value;
383 }
384
385
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)386 void Simulator::ld1(VectorFormat vform,
387 LogicVRegister dst,
388 uint64_t addr) {
389 dst.ClearForWrite(vform);
390 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
391 dst.ReadUintFromMem(vform, i, addr);
392 addr += LaneSizeInBytesFromFormat(vform);
393 }
394 }
395
396
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)397 void Simulator::ld1(VectorFormat vform,
398 LogicVRegister dst,
399 int index,
400 uint64_t addr) {
401 dst.ReadUintFromMem(vform, index, addr);
402 }
403
404
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)405 void Simulator::ld1r(VectorFormat vform,
406 LogicVRegister dst,
407 uint64_t addr) {
408 dst.ClearForWrite(vform);
409 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
410 dst.ReadUintFromMem(vform, i, addr);
411 }
412 }
413
414
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)415 void Simulator::ld2(VectorFormat vform,
416 LogicVRegister dst1,
417 LogicVRegister dst2,
418 uint64_t addr1) {
419 dst1.ClearForWrite(vform);
420 dst2.ClearForWrite(vform);
421 int esize = LaneSizeInBytesFromFormat(vform);
422 uint64_t addr2 = addr1 + esize;
423 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
424 dst1.ReadUintFromMem(vform, i, addr1);
425 dst2.ReadUintFromMem(vform, i, addr2);
426 addr1 += 2 * esize;
427 addr2 += 2 * esize;
428 }
429 }
430
431
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)432 void Simulator::ld2(VectorFormat vform,
433 LogicVRegister dst1,
434 LogicVRegister dst2,
435 int index,
436 uint64_t addr1) {
437 dst1.ClearForWrite(vform);
438 dst2.ClearForWrite(vform);
439 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
440 dst1.ReadUintFromMem(vform, index, addr1);
441 dst2.ReadUintFromMem(vform, index, addr2);
442 }
443
444
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)445 void Simulator::ld2r(VectorFormat vform,
446 LogicVRegister dst1,
447 LogicVRegister dst2,
448 uint64_t addr) {
449 dst1.ClearForWrite(vform);
450 dst2.ClearForWrite(vform);
451 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
452 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
453 dst1.ReadUintFromMem(vform, i, addr);
454 dst2.ReadUintFromMem(vform, i, addr2);
455 }
456 }
457
458
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)459 void Simulator::ld3(VectorFormat vform,
460 LogicVRegister dst1,
461 LogicVRegister dst2,
462 LogicVRegister dst3,
463 uint64_t addr1) {
464 dst1.ClearForWrite(vform);
465 dst2.ClearForWrite(vform);
466 dst3.ClearForWrite(vform);
467 int esize = LaneSizeInBytesFromFormat(vform);
468 uint64_t addr2 = addr1 + esize;
469 uint64_t addr3 = addr2 + esize;
470 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
471 dst1.ReadUintFromMem(vform, i, addr1);
472 dst2.ReadUintFromMem(vform, i, addr2);
473 dst3.ReadUintFromMem(vform, i, addr3);
474 addr1 += 3 * esize;
475 addr2 += 3 * esize;
476 addr3 += 3 * esize;
477 }
478 }
479
480
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)481 void Simulator::ld3(VectorFormat vform,
482 LogicVRegister dst1,
483 LogicVRegister dst2,
484 LogicVRegister dst3,
485 int index,
486 uint64_t addr1) {
487 dst1.ClearForWrite(vform);
488 dst2.ClearForWrite(vform);
489 dst3.ClearForWrite(vform);
490 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
491 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
492 dst1.ReadUintFromMem(vform, index, addr1);
493 dst2.ReadUintFromMem(vform, index, addr2);
494 dst3.ReadUintFromMem(vform, index, addr3);
495 }
496
497
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)498 void Simulator::ld3r(VectorFormat vform,
499 LogicVRegister dst1,
500 LogicVRegister dst2,
501 LogicVRegister dst3,
502 uint64_t addr) {
503 dst1.ClearForWrite(vform);
504 dst2.ClearForWrite(vform);
505 dst3.ClearForWrite(vform);
506 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
507 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
508 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
509 dst1.ReadUintFromMem(vform, i, addr);
510 dst2.ReadUintFromMem(vform, i, addr2);
511 dst3.ReadUintFromMem(vform, i, addr3);
512 }
513 }
514
515
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)516 void Simulator::ld4(VectorFormat vform,
517 LogicVRegister dst1,
518 LogicVRegister dst2,
519 LogicVRegister dst3,
520 LogicVRegister dst4,
521 uint64_t addr1) {
522 dst1.ClearForWrite(vform);
523 dst2.ClearForWrite(vform);
524 dst3.ClearForWrite(vform);
525 dst4.ClearForWrite(vform);
526 int esize = LaneSizeInBytesFromFormat(vform);
527 uint64_t addr2 = addr1 + esize;
528 uint64_t addr3 = addr2 + esize;
529 uint64_t addr4 = addr3 + esize;
530 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
531 dst1.ReadUintFromMem(vform, i, addr1);
532 dst2.ReadUintFromMem(vform, i, addr2);
533 dst3.ReadUintFromMem(vform, i, addr3);
534 dst4.ReadUintFromMem(vform, i, addr4);
535 addr1 += 4 * esize;
536 addr2 += 4 * esize;
537 addr3 += 4 * esize;
538 addr4 += 4 * esize;
539 }
540 }
541
542
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)543 void Simulator::ld4(VectorFormat vform,
544 LogicVRegister dst1,
545 LogicVRegister dst2,
546 LogicVRegister dst3,
547 LogicVRegister dst4,
548 int index,
549 uint64_t addr1) {
550 dst1.ClearForWrite(vform);
551 dst2.ClearForWrite(vform);
552 dst3.ClearForWrite(vform);
553 dst4.ClearForWrite(vform);
554 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
555 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
556 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
557 dst1.ReadUintFromMem(vform, index, addr1);
558 dst2.ReadUintFromMem(vform, index, addr2);
559 dst3.ReadUintFromMem(vform, index, addr3);
560 dst4.ReadUintFromMem(vform, index, addr4);
561 }
562
563
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)564 void Simulator::ld4r(VectorFormat vform,
565 LogicVRegister dst1,
566 LogicVRegister dst2,
567 LogicVRegister dst3,
568 LogicVRegister dst4,
569 uint64_t addr) {
570 dst1.ClearForWrite(vform);
571 dst2.ClearForWrite(vform);
572 dst3.ClearForWrite(vform);
573 dst4.ClearForWrite(vform);
574 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
575 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
576 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
577 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
578 dst1.ReadUintFromMem(vform, i, addr);
579 dst2.ReadUintFromMem(vform, i, addr2);
580 dst3.ReadUintFromMem(vform, i, addr3);
581 dst4.ReadUintFromMem(vform, i, addr4);
582 }
583 }
584
585
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)586 void Simulator::st1(VectorFormat vform,
587 LogicVRegister src,
588 uint64_t addr) {
589 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
590 src.WriteUintToMem(vform, i, addr);
591 addr += LaneSizeInBytesFromFormat(vform);
592 }
593 }
594
595
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)596 void Simulator::st1(VectorFormat vform,
597 LogicVRegister src,
598 int index,
599 uint64_t addr) {
600 src.WriteUintToMem(vform, index, addr);
601 }
602
603
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,uint64_t addr)604 void Simulator::st2(VectorFormat vform,
605 LogicVRegister dst,
606 LogicVRegister dst2,
607 uint64_t addr) {
608 int esize = LaneSizeInBytesFromFormat(vform);
609 uint64_t addr2 = addr + esize;
610 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
611 dst.WriteUintToMem(vform, i, addr);
612 dst2.WriteUintToMem(vform, i, addr2);
613 addr += 2 * esize;
614 addr2 += 2 * esize;
615 }
616 }
617
618
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,int index,uint64_t addr)619 void Simulator::st2(VectorFormat vform,
620 LogicVRegister dst,
621 LogicVRegister dst2,
622 int index,
623 uint64_t addr) {
624 int esize = LaneSizeInBytesFromFormat(vform);
625 dst.WriteUintToMem(vform, index, addr);
626 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
627 }
628
629
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)630 void Simulator::st3(VectorFormat vform,
631 LogicVRegister dst,
632 LogicVRegister dst2,
633 LogicVRegister dst3,
634 uint64_t addr) {
635 int esize = LaneSizeInBytesFromFormat(vform);
636 uint64_t addr2 = addr + esize;
637 uint64_t addr3 = addr2 + esize;
638 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
639 dst.WriteUintToMem(vform, i, addr);
640 dst2.WriteUintToMem(vform, i, addr2);
641 dst3.WriteUintToMem(vform, i, addr3);
642 addr += 3 * esize;
643 addr2 += 3 * esize;
644 addr3 += 3 * esize;
645 }
646 }
647
648
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr)649 void Simulator::st3(VectorFormat vform,
650 LogicVRegister dst,
651 LogicVRegister dst2,
652 LogicVRegister dst3,
653 int index,
654 uint64_t addr) {
655 int esize = LaneSizeInBytesFromFormat(vform);
656 dst.WriteUintToMem(vform, index, addr);
657 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
658 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
659 }
660
661
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)662 void Simulator::st4(VectorFormat vform,
663 LogicVRegister dst,
664 LogicVRegister dst2,
665 LogicVRegister dst3,
666 LogicVRegister dst4,
667 uint64_t addr) {
668 int esize = LaneSizeInBytesFromFormat(vform);
669 uint64_t addr2 = addr + esize;
670 uint64_t addr3 = addr2 + esize;
671 uint64_t addr4 = addr3 + esize;
672 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
673 dst.WriteUintToMem(vform, i, addr);
674 dst2.WriteUintToMem(vform, i, addr2);
675 dst3.WriteUintToMem(vform, i, addr3);
676 dst4.WriteUintToMem(vform, i, addr4);
677 addr += 4 * esize;
678 addr2 += 4 * esize;
679 addr3 += 4 * esize;
680 addr4 += 4 * esize;
681 }
682 }
683
684
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr)685 void Simulator::st4(VectorFormat vform,
686 LogicVRegister dst,
687 LogicVRegister dst2,
688 LogicVRegister dst3,
689 LogicVRegister dst4,
690 int index,
691 uint64_t addr) {
692 int esize = LaneSizeInBytesFromFormat(vform);
693 dst.WriteUintToMem(vform, index, addr);
694 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
695 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
696 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
697 }
698
699
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)700 LogicVRegister Simulator::cmp(VectorFormat vform,
701 LogicVRegister dst,
702 const LogicVRegister& src1,
703 const LogicVRegister& src2,
704 Condition cond) {
705 dst.ClearForWrite(vform);
706 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
707 int64_t sa = src1.Int(vform, i);
708 int64_t sb = src2.Int(vform, i);
709 uint64_t ua = src1.Uint(vform, i);
710 uint64_t ub = src2.Uint(vform, i);
711 bool result = false;
712 switch (cond) {
713 case eq: result = (ua == ub); break;
714 case ge: result = (sa >= sb); break;
715 case gt: result = (sa > sb) ; break;
716 case hi: result = (ua > ub) ; break;
717 case hs: result = (ua >= ub); break;
718 case lt: result = (sa < sb) ; break;
719 case le: result = (sa <= sb); break;
720 default: VIXL_UNREACHABLE(); break;
721 }
722 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
723 }
724 return dst;
725 }
726
727
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)728 LogicVRegister Simulator::cmp(VectorFormat vform,
729 LogicVRegister dst,
730 const LogicVRegister& src1,
731 int imm,
732 Condition cond) {
733 SimVRegister temp;
734 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
735 return cmp(vform, dst, src1, imm_reg, cond);
736 }
737
738
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)739 LogicVRegister Simulator::cmptst(VectorFormat vform,
740 LogicVRegister dst,
741 const LogicVRegister& src1,
742 const LogicVRegister& src2) {
743 dst.ClearForWrite(vform);
744 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
745 uint64_t ua = src1.Uint(vform, i);
746 uint64_t ub = src2.Uint(vform, i);
747 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
748 }
749 return dst;
750 }
751
752
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)753 LogicVRegister Simulator::add(VectorFormat vform,
754 LogicVRegister dst,
755 const LogicVRegister& src1,
756 const LogicVRegister& src2) {
757 dst.ClearForWrite(vform);
758 // TODO(all): consider assigning the result of LaneCountFromFormat to a local.
759 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
760 // Test for unsigned saturation.
761 uint64_t ua = src1.UintLeftJustified(vform, i);
762 uint64_t ub = src2.UintLeftJustified(vform, i);
763 uint64_t ur = ua + ub;
764 if (ur < ua) {
765 dst.SetUnsignedSat(i, true);
766 }
767
768 // Test for signed saturation.
769 int64_t sa = src1.IntLeftJustified(vform, i);
770 int64_t sb = src2.IntLeftJustified(vform, i);
771 int64_t sr = sa + sb;
772 // If the signs of the operands are the same, but different from the result,
773 // there was an overflow.
774 if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
775 dst.SetSignedSat(i, sa >= 0);
776 }
777
778 dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i));
779 }
780 return dst;
781 }
782
783
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)784 LogicVRegister Simulator::addp(VectorFormat vform,
785 LogicVRegister dst,
786 const LogicVRegister& src1,
787 const LogicVRegister& src2) {
788 SimVRegister temp1, temp2;
789 uzp1(vform, temp1, src1, src2);
790 uzp2(vform, temp2, src1, src2);
791 add(vform, dst, temp1, temp2);
792 return dst;
793 }
794
795
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)796 LogicVRegister Simulator::mla(VectorFormat vform,
797 LogicVRegister dst,
798 const LogicVRegister& src1,
799 const LogicVRegister& src2) {
800 SimVRegister temp;
801 mul(vform, temp, src1, src2);
802 add(vform, dst, dst, temp);
803 return dst;
804 }
805
806
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)807 LogicVRegister Simulator::mls(VectorFormat vform,
808 LogicVRegister dst,
809 const LogicVRegister& src1,
810 const LogicVRegister& src2) {
811 SimVRegister temp;
812 mul(vform, temp, src1, src2);
813 sub(vform, dst, dst, temp);
814 return dst;
815 }
816
817
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)818 LogicVRegister Simulator::mul(VectorFormat vform,
819 LogicVRegister dst,
820 const LogicVRegister& src1,
821 const LogicVRegister& src2) {
822 dst.ClearForWrite(vform);
823 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
824 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
825 }
826 return dst;
827 }
828
829
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)830 LogicVRegister Simulator::mul(VectorFormat vform,
831 LogicVRegister dst,
832 const LogicVRegister& src1,
833 const LogicVRegister& src2,
834 int index) {
835 SimVRegister temp;
836 VectorFormat indexform = VectorFormatFillQ(vform);
837 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
838 }
839
840
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)841 LogicVRegister Simulator::mla(VectorFormat vform,
842 LogicVRegister dst,
843 const LogicVRegister& src1,
844 const LogicVRegister& src2,
845 int index) {
846 SimVRegister temp;
847 VectorFormat indexform = VectorFormatFillQ(vform);
848 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
849 }
850
851
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)852 LogicVRegister Simulator::mls(VectorFormat vform,
853 LogicVRegister dst,
854 const LogicVRegister& src1,
855 const LogicVRegister& src2,
856 int index) {
857 SimVRegister temp;
858 VectorFormat indexform = VectorFormatFillQ(vform);
859 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
860 }
861
862
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)863 LogicVRegister Simulator::smull(VectorFormat vform,
864 LogicVRegister dst,
865 const LogicVRegister& src1,
866 const LogicVRegister& src2,
867 int index) {
868 SimVRegister temp;
869 VectorFormat indexform =
870 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
871 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
872 }
873
874
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)875 LogicVRegister Simulator::smull2(VectorFormat vform,
876 LogicVRegister dst,
877 const LogicVRegister& src1,
878 const LogicVRegister& src2,
879 int index) {
880 SimVRegister temp;
881 VectorFormat indexform =
882 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
883 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
884 }
885
886
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)887 LogicVRegister Simulator::umull(VectorFormat vform,
888 LogicVRegister dst,
889 const LogicVRegister& src1,
890 const LogicVRegister& src2,
891 int index) {
892 SimVRegister temp;
893 VectorFormat indexform =
894 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
895 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
896 }
897
898
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)899 LogicVRegister Simulator::umull2(VectorFormat vform,
900 LogicVRegister dst,
901 const LogicVRegister& src1,
902 const LogicVRegister& src2,
903 int index) {
904 SimVRegister temp;
905 VectorFormat indexform =
906 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
907 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
908 }
909
910
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)911 LogicVRegister Simulator::smlal(VectorFormat vform,
912 LogicVRegister dst,
913 const LogicVRegister& src1,
914 const LogicVRegister& src2,
915 int index) {
916 SimVRegister temp;
917 VectorFormat indexform =
918 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
919 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
920 }
921
922
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)923 LogicVRegister Simulator::smlal2(VectorFormat vform,
924 LogicVRegister dst,
925 const LogicVRegister& src1,
926 const LogicVRegister& src2,
927 int index) {
928 SimVRegister temp;
929 VectorFormat indexform =
930 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
931 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
932 }
933
934
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)935 LogicVRegister Simulator::umlal(VectorFormat vform,
936 LogicVRegister dst,
937 const LogicVRegister& src1,
938 const LogicVRegister& src2,
939 int index) {
940 SimVRegister temp;
941 VectorFormat indexform =
942 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
943 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
944 }
945
946
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)947 LogicVRegister Simulator::umlal2(VectorFormat vform,
948 LogicVRegister dst,
949 const LogicVRegister& src1,
950 const LogicVRegister& src2,
951 int index) {
952 SimVRegister temp;
953 VectorFormat indexform =
954 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
955 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
956 }
957
958
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)959 LogicVRegister Simulator::smlsl(VectorFormat vform,
960 LogicVRegister dst,
961 const LogicVRegister& src1,
962 const LogicVRegister& src2,
963 int index) {
964 SimVRegister temp;
965 VectorFormat indexform =
966 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
967 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
968 }
969
970
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)971 LogicVRegister Simulator::smlsl2(VectorFormat vform,
972 LogicVRegister dst,
973 const LogicVRegister& src1,
974 const LogicVRegister& src2,
975 int index) {
976 SimVRegister temp;
977 VectorFormat indexform =
978 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
979 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
980 }
981
982
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)983 LogicVRegister Simulator::umlsl(VectorFormat vform,
984 LogicVRegister dst,
985 const LogicVRegister& src1,
986 const LogicVRegister& src2,
987 int index) {
988 SimVRegister temp;
989 VectorFormat indexform =
990 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
991 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
992 }
993
994
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)995 LogicVRegister Simulator::umlsl2(VectorFormat vform,
996 LogicVRegister dst,
997 const LogicVRegister& src1,
998 const LogicVRegister& src2,
999 int index) {
1000 SimVRegister temp;
1001 VectorFormat indexform =
1002 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1003 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1004 }
1005
1006
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1007 LogicVRegister Simulator::sqdmull(VectorFormat vform,
1008 LogicVRegister dst,
1009 const LogicVRegister& src1,
1010 const LogicVRegister& src2,
1011 int index) {
1012 SimVRegister temp;
1013 VectorFormat indexform =
1014 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1015 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
1016 }
1017
1018
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1019 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
1020 LogicVRegister dst,
1021 const LogicVRegister& src1,
1022 const LogicVRegister& src2,
1023 int index) {
1024 SimVRegister temp;
1025 VectorFormat indexform =
1026 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1027 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1028 }
1029
1030
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1031 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
1032 LogicVRegister dst,
1033 const LogicVRegister& src1,
1034 const LogicVRegister& src2,
1035 int index) {
1036 SimVRegister temp;
1037 VectorFormat indexform =
1038 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1039 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
1040 }
1041
1042
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1043 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
1044 LogicVRegister dst,
1045 const LogicVRegister& src1,
1046 const LogicVRegister& src2,
1047 int index) {
1048 SimVRegister temp;
1049 VectorFormat indexform =
1050 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1051 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1052 }
1053
1054
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1055 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
1056 LogicVRegister dst,
1057 const LogicVRegister& src1,
1058 const LogicVRegister& src2,
1059 int index) {
1060 SimVRegister temp;
1061 VectorFormat indexform =
1062 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1063 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1064 }
1065
1066
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1067 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
1068 LogicVRegister dst,
1069 const LogicVRegister& src1,
1070 const LogicVRegister& src2,
1071 int index) {
1072 SimVRegister temp;
1073 VectorFormat indexform =
1074 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1075 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1076 }
1077
1078
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1079 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
1080 LogicVRegister dst,
1081 const LogicVRegister& src1,
1082 const LogicVRegister& src2,
1083 int index) {
1084 SimVRegister temp;
1085 VectorFormat indexform = VectorFormatFillQ(vform);
1086 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1087 }
1088
1089
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1090 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
1091 LogicVRegister dst,
1092 const LogicVRegister& src1,
1093 const LogicVRegister& src2,
1094 int index) {
1095 SimVRegister temp;
1096 VectorFormat indexform = VectorFormatFillQ(vform);
1097 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1098 }
1099
1100
PolynomialMult(uint8_t op1,uint8_t op2)1101 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
1102 uint16_t result = 0;
1103 uint16_t extended_op2 = op2;
1104 for (int i = 0; i < 8; ++i) {
1105 if ((op1 >> i) & 1) {
1106 result = result ^ (extended_op2 << i);
1107 }
1108 }
1109 return result;
1110 }
1111
1112
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1113 LogicVRegister Simulator::pmul(VectorFormat vform,
1114 LogicVRegister dst,
1115 const LogicVRegister& src1,
1116 const LogicVRegister& src2) {
1117 dst.ClearForWrite(vform);
1118 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1119 dst.SetUint(vform, i,
1120 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
1121 }
1122 return dst;
1123 }
1124
1125
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1126 LogicVRegister Simulator::pmull(VectorFormat vform,
1127 LogicVRegister dst,
1128 const LogicVRegister& src1,
1129 const LogicVRegister& src2) {
1130 VectorFormat vform_src = VectorFormatHalfWidth(vform);
1131 dst.ClearForWrite(vform);
1132 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1133 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i),
1134 src2.Uint(vform_src, i)));
1135 }
1136 return dst;
1137 }
1138
1139
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1140 LogicVRegister Simulator::pmull2(VectorFormat vform,
1141 LogicVRegister dst,
1142 const LogicVRegister& src1,
1143 const LogicVRegister& src2) {
1144 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
1145 dst.ClearForWrite(vform);
1146 int lane_count = LaneCountFromFormat(vform);
1147 for (int i = 0; i < lane_count; i++) {
1148 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i),
1149 src2.Uint(vform_src, lane_count + i)));
1150 }
1151 return dst;
1152 }
1153
1154
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1155 LogicVRegister Simulator::sub(VectorFormat vform,
1156 LogicVRegister dst,
1157 const LogicVRegister& src1,
1158 const LogicVRegister& src2) {
1159 dst.ClearForWrite(vform);
1160 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1161 // Test for unsigned saturation.
1162 if (src2.Uint(vform, i) > src1.Uint(vform, i)) {
1163 dst.SetUnsignedSat(i, false);
1164 }
1165
1166 // Test for signed saturation.
1167 int64_t sa = src1.IntLeftJustified(vform, i);
1168 int64_t sb = src2.IntLeftJustified(vform, i);
1169 int64_t sr = sa - sb;
1170 // If the signs of the operands are different, and the sign of the first
1171 // operand doesn't match the result, there was an overflow.
1172 if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
1173 dst.SetSignedSat(i, sr < 0);
1174 }
1175
1176 dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i));
1177 }
1178 return dst;
1179 }
1180
1181
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1182 LogicVRegister Simulator::and_(VectorFormat vform,
1183 LogicVRegister dst,
1184 const LogicVRegister& src1,
1185 const LogicVRegister& src2) {
1186 dst.ClearForWrite(vform);
1187 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1188 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1189 }
1190 return dst;
1191 }
1192
1193
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1194 LogicVRegister Simulator::orr(VectorFormat vform,
1195 LogicVRegister dst,
1196 const LogicVRegister& src1,
1197 const LogicVRegister& src2) {
1198 dst.ClearForWrite(vform);
1199 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1200 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1201 }
1202 return dst;
1203 }
1204
1205
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1206 LogicVRegister Simulator::orn(VectorFormat vform,
1207 LogicVRegister dst,
1208 const LogicVRegister& src1,
1209 const LogicVRegister& src2) {
1210 dst.ClearForWrite(vform);
1211 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1212 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1213 }
1214 return dst;
1215 }
1216
1217
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1218 LogicVRegister Simulator::eor(VectorFormat vform,
1219 LogicVRegister dst,
1220 const LogicVRegister& src1,
1221 const LogicVRegister& src2) {
1222 dst.ClearForWrite(vform);
1223 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1224 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1225 }
1226 return dst;
1227 }
1228
1229
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1230 LogicVRegister Simulator::bic(VectorFormat vform,
1231 LogicVRegister dst,
1232 const LogicVRegister& src1,
1233 const LogicVRegister& src2) {
1234 dst.ClearForWrite(vform);
1235 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1236 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1237 }
1238 return dst;
1239 }
1240
1241
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1242 LogicVRegister Simulator::bic(VectorFormat vform,
1243 LogicVRegister dst,
1244 const LogicVRegister& src,
1245 uint64_t imm) {
1246 uint64_t result[16];
1247 int laneCount = LaneCountFromFormat(vform);
1248 for (int i = 0; i < laneCount; ++i) {
1249 result[i] = src.Uint(vform, i) & ~imm;
1250 }
1251 dst.ClearForWrite(vform);
1252 for (int i = 0; i < laneCount; ++i) {
1253 dst.SetUint(vform, i, result[i]);
1254 }
1255 return dst;
1256 }
1257
1258
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1259 LogicVRegister Simulator::bif(VectorFormat vform,
1260 LogicVRegister dst,
1261 const LogicVRegister& src1,
1262 const LogicVRegister& src2) {
1263 dst.ClearForWrite(vform);
1264 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1265 uint64_t operand1 = dst.Uint(vform, i);
1266 uint64_t operand2 = ~src2.Uint(vform, i);
1267 uint64_t operand3 = src1.Uint(vform, i);
1268 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1269 dst.SetUint(vform, i, result);
1270 }
1271 return dst;
1272 }
1273
1274
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1275 LogicVRegister Simulator::bit(VectorFormat vform,
1276 LogicVRegister dst,
1277 const LogicVRegister& src1,
1278 const LogicVRegister& src2) {
1279 dst.ClearForWrite(vform);
1280 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1281 uint64_t operand1 = dst.Uint(vform, i);
1282 uint64_t operand2 = src2.Uint(vform, i);
1283 uint64_t operand3 = src1.Uint(vform, i);
1284 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1285 dst.SetUint(vform, i, result);
1286 }
1287 return dst;
1288 }
1289
1290
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1291 LogicVRegister Simulator::bsl(VectorFormat vform,
1292 LogicVRegister dst,
1293 const LogicVRegister& src1,
1294 const LogicVRegister& src2) {
1295 dst.ClearForWrite(vform);
1296 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1297 uint64_t operand1 = src2.Uint(vform, i);
1298 uint64_t operand2 = dst.Uint(vform, i);
1299 uint64_t operand3 = src1.Uint(vform, i);
1300 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1301 dst.SetUint(vform, i, result);
1302 }
1303 return dst;
1304 }
1305
1306
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1307 LogicVRegister Simulator::sminmax(VectorFormat vform,
1308 LogicVRegister dst,
1309 const LogicVRegister& src1,
1310 const LogicVRegister& src2,
1311 bool max) {
1312 dst.ClearForWrite(vform);
1313 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1314 int64_t src1_val = src1.Int(vform, i);
1315 int64_t src2_val = src2.Int(vform, i);
1316 int64_t dst_val;
1317 if (max == true) {
1318 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1319 } else {
1320 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1321 }
1322 dst.SetInt(vform, i, dst_val);
1323 }
1324 return dst;
1325 }
1326
1327
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1328 LogicVRegister Simulator::smax(VectorFormat vform,
1329 LogicVRegister dst,
1330 const LogicVRegister& src1,
1331 const LogicVRegister& src2) {
1332 return sminmax(vform, dst, src1, src2, true);
1333 }
1334
1335
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1336 LogicVRegister Simulator::smin(VectorFormat vform,
1337 LogicVRegister dst,
1338 const LogicVRegister& src1,
1339 const LogicVRegister& src2) {
1340 return sminmax(vform, dst, src1, src2, false);
1341 }
1342
1343
sminmaxp(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,bool max)1344 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1345 LogicVRegister dst,
1346 int dst_index,
1347 const LogicVRegister& src,
1348 bool max) {
1349 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1350 int64_t src1_val = src.Int(vform, i);
1351 int64_t src2_val = src.Int(vform, i + 1);
1352 int64_t dst_val;
1353 if (max == true) {
1354 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1355 } else {
1356 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1357 }
1358 dst.SetInt(vform, dst_index + (i >> 1), dst_val);
1359 }
1360 return dst;
1361 }
1362
1363
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1364 LogicVRegister Simulator::smaxp(VectorFormat vform,
1365 LogicVRegister dst,
1366 const LogicVRegister& src1,
1367 const LogicVRegister& src2) {
1368 dst.ClearForWrite(vform);
1369 sminmaxp(vform, dst, 0, src1, true);
1370 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
1371 return dst;
1372 }
1373
1374
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1375 LogicVRegister Simulator::sminp(VectorFormat vform,
1376 LogicVRegister dst,
1377 const LogicVRegister& src1,
1378 const LogicVRegister& src2) {
1379 dst.ClearForWrite(vform);
1380 sminmaxp(vform, dst, 0, src1, false);
1381 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
1382 return dst;
1383 }
1384
1385
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1386 LogicVRegister Simulator::addp(VectorFormat vform,
1387 LogicVRegister dst,
1388 const LogicVRegister& src) {
1389 VIXL_ASSERT(vform == kFormatD);
1390
1391 int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1);
1392 dst.ClearForWrite(vform);
1393 dst.SetInt(vform, 0, dst_val);
1394 return dst;
1395 }
1396
1397
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1398 LogicVRegister Simulator::addv(VectorFormat vform,
1399 LogicVRegister dst,
1400 const LogicVRegister& src) {
1401 VectorFormat vform_dst
1402 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1403
1404
1405 int64_t dst_val = 0;
1406 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1407 dst_val += src.Int(vform, i);
1408 }
1409
1410 dst.ClearForWrite(vform_dst);
1411 dst.SetInt(vform_dst, 0, dst_val);
1412 return dst;
1413 }
1414
1415
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1416 LogicVRegister Simulator::saddlv(VectorFormat vform,
1417 LogicVRegister dst,
1418 const LogicVRegister& src) {
1419 VectorFormat vform_dst
1420 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1421
1422 int64_t dst_val = 0;
1423 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1424 dst_val += src.Int(vform, i);
1425 }
1426
1427 dst.ClearForWrite(vform_dst);
1428 dst.SetInt(vform_dst, 0, dst_val);
1429 return dst;
1430 }
1431
1432
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1433 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1434 LogicVRegister dst,
1435 const LogicVRegister& src) {
1436 VectorFormat vform_dst
1437 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1438
1439 uint64_t dst_val = 0;
1440 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1441 dst_val += src.Uint(vform, i);
1442 }
1443
1444 dst.ClearForWrite(vform_dst);
1445 dst.SetUint(vform_dst, 0, dst_val);
1446 return dst;
1447 }
1448
1449
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1450 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1451 LogicVRegister dst,
1452 const LogicVRegister& src,
1453 bool max) {
1454 dst.ClearForWrite(vform);
1455 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1456 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1457 dst.SetInt(vform, i, 0);
1458 int64_t src_val = src.Int(vform, i);
1459 if (max == true) {
1460 dst_val = (src_val > dst_val) ? src_val : dst_val;
1461 } else {
1462 dst_val = (src_val < dst_val) ? src_val : dst_val;
1463 }
1464 }
1465 dst.SetInt(vform, 0, dst_val);
1466 return dst;
1467 }
1468
1469
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1470 LogicVRegister Simulator::smaxv(VectorFormat vform,
1471 LogicVRegister dst,
1472 const LogicVRegister& src) {
1473 sminmaxv(vform, dst, src, true);
1474 return dst;
1475 }
1476
1477
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1478 LogicVRegister Simulator::sminv(VectorFormat vform,
1479 LogicVRegister dst,
1480 const LogicVRegister& src) {
1481 sminmaxv(vform, dst, src, false);
1482 return dst;
1483 }
1484
1485
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1486 LogicVRegister Simulator::uminmax(VectorFormat vform,
1487 LogicVRegister dst,
1488 const LogicVRegister& src1,
1489 const LogicVRegister& src2,
1490 bool max) {
1491 dst.ClearForWrite(vform);
1492 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1493 uint64_t src1_val = src1.Uint(vform, i);
1494 uint64_t src2_val = src2.Uint(vform, i);
1495 uint64_t dst_val;
1496 if (max == true) {
1497 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1498 } else {
1499 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1500 }
1501 dst.SetUint(vform, i, dst_val);
1502 }
1503 return dst;
1504 }
1505
1506
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1507 LogicVRegister Simulator::umax(VectorFormat vform,
1508 LogicVRegister dst,
1509 const LogicVRegister& src1,
1510 const LogicVRegister& src2) {
1511 return uminmax(vform, dst, src1, src2, true);
1512 }
1513
1514
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1515 LogicVRegister Simulator::umin(VectorFormat vform,
1516 LogicVRegister dst,
1517 const LogicVRegister& src1,
1518 const LogicVRegister& src2) {
1519 return uminmax(vform, dst, src1, src2, false);
1520 }
1521
1522
uminmaxp(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,bool max)1523 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1524 LogicVRegister dst,
1525 int dst_index,
1526 const LogicVRegister& src,
1527 bool max) {
1528 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1529 uint64_t src1_val = src.Uint(vform, i);
1530 uint64_t src2_val = src.Uint(vform, i + 1);
1531 uint64_t dst_val;
1532 if (max == true) {
1533 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1534 } else {
1535 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1536 }
1537 dst.SetUint(vform, dst_index + (i >> 1), dst_val);
1538 }
1539 return dst;
1540 }
1541
1542
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1543 LogicVRegister Simulator::umaxp(VectorFormat vform,
1544 LogicVRegister dst,
1545 const LogicVRegister& src1,
1546 const LogicVRegister& src2) {
1547 dst.ClearForWrite(vform);
1548 uminmaxp(vform, dst, 0, src1, true);
1549 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
1550 return dst;
1551 }
1552
1553
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1554 LogicVRegister Simulator::uminp(VectorFormat vform,
1555 LogicVRegister dst,
1556 const LogicVRegister& src1,
1557 const LogicVRegister& src2) {
1558 dst.ClearForWrite(vform);
1559 uminmaxp(vform, dst, 0, src1, false);
1560 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
1561 return dst;
1562 }
1563
1564
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1565 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1566 LogicVRegister dst,
1567 const LogicVRegister& src,
1568 bool max) {
1569 dst.ClearForWrite(vform);
1570 uint64_t dst_val = max ? 0 : UINT64_MAX;
1571 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1572 dst.SetUint(vform, i, 0);
1573 uint64_t src_val = src.Uint(vform, i);
1574 if (max == true) {
1575 dst_val = (src_val > dst_val) ? src_val : dst_val;
1576 } else {
1577 dst_val = (src_val < dst_val) ? src_val : dst_val;
1578 }
1579 }
1580 dst.SetUint(vform, 0, dst_val);
1581 return dst;
1582 }
1583
1584
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1585 LogicVRegister Simulator::umaxv(VectorFormat vform,
1586 LogicVRegister dst,
1587 const LogicVRegister& src) {
1588 uminmaxv(vform, dst, src, true);
1589 return dst;
1590 }
1591
1592
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1593 LogicVRegister Simulator::uminv(VectorFormat vform,
1594 LogicVRegister dst,
1595 const LogicVRegister& src) {
1596 uminmaxv(vform, dst, src, false);
1597 return dst;
1598 }
1599
1600
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1601 LogicVRegister Simulator::shl(VectorFormat vform,
1602 LogicVRegister dst,
1603 const LogicVRegister& src,
1604 int shift) {
1605 VIXL_ASSERT(shift >= 0);
1606 SimVRegister temp;
1607 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1608 return ushl(vform, dst, src, shiftreg);
1609 }
1610
1611
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1612 LogicVRegister Simulator::sshll(VectorFormat vform,
1613 LogicVRegister dst,
1614 const LogicVRegister& src,
1615 int shift) {
1616 VIXL_ASSERT(shift >= 0);
1617 SimVRegister temp1, temp2;
1618 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1619 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1620 return sshl(vform, dst, extendedreg, shiftreg);
1621 }
1622
1623
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1624 LogicVRegister Simulator::sshll2(VectorFormat vform,
1625 LogicVRegister dst,
1626 const LogicVRegister& src,
1627 int shift) {
1628 VIXL_ASSERT(shift >= 0);
1629 SimVRegister temp1, temp2;
1630 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1631 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1632 return sshl(vform, dst, extendedreg, shiftreg);
1633 }
1634
1635
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1636 LogicVRegister Simulator::shll(VectorFormat vform,
1637 LogicVRegister dst,
1638 const LogicVRegister& src) {
1639 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1640 return sshll(vform, dst, src, shift);
1641 }
1642
1643
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1644 LogicVRegister Simulator::shll2(VectorFormat vform,
1645 LogicVRegister dst,
1646 const LogicVRegister& src) {
1647 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1648 return sshll2(vform, dst, src, shift);
1649 }
1650
1651
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1652 LogicVRegister Simulator::ushll(VectorFormat vform,
1653 LogicVRegister dst,
1654 const LogicVRegister& src,
1655 int shift) {
1656 VIXL_ASSERT(shift >= 0);
1657 SimVRegister temp1, temp2;
1658 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1659 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1660 return ushl(vform, dst, extendedreg, shiftreg);
1661 }
1662
1663
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1664 LogicVRegister Simulator::ushll2(VectorFormat vform,
1665 LogicVRegister dst,
1666 const LogicVRegister& src,
1667 int shift) {
1668 VIXL_ASSERT(shift >= 0);
1669 SimVRegister temp1, temp2;
1670 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1671 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1672 return ushl(vform, dst, extendedreg, shiftreg);
1673 }
1674
1675
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1676 LogicVRegister Simulator::sli(VectorFormat vform,
1677 LogicVRegister dst,
1678 const LogicVRegister& src,
1679 int shift) {
1680 dst.ClearForWrite(vform);
1681 int laneCount = LaneCountFromFormat(vform);
1682 for (int i = 0; i < laneCount; i++) {
1683 uint64_t src_lane = src.Uint(vform, i);
1684 uint64_t dst_lane = dst.Uint(vform, i);
1685 uint64_t shifted = src_lane << shift;
1686 uint64_t mask = MaxUintFromFormat(vform) << shift;
1687 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1688 }
1689 return dst;
1690 }
1691
1692
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1693 LogicVRegister Simulator::sqshl(VectorFormat vform,
1694 LogicVRegister dst,
1695 const LogicVRegister& src,
1696 int shift) {
1697 VIXL_ASSERT(shift >= 0);
1698 SimVRegister temp;
1699 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1700 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1701 }
1702
1703
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1704 LogicVRegister Simulator::uqshl(VectorFormat vform,
1705 LogicVRegister dst,
1706 const LogicVRegister& src,
1707 int shift) {
1708 VIXL_ASSERT(shift >= 0);
1709 SimVRegister temp;
1710 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1711 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1712 }
1713
1714
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1715 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1716 LogicVRegister dst,
1717 const LogicVRegister& src,
1718 int shift) {
1719 VIXL_ASSERT(shift >= 0);
1720 SimVRegister temp;
1721 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1722 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1723 }
1724
1725
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1726 LogicVRegister Simulator::sri(VectorFormat vform,
1727 LogicVRegister dst,
1728 const LogicVRegister& src,
1729 int shift) {
1730 dst.ClearForWrite(vform);
1731 int laneCount = LaneCountFromFormat(vform);
1732 VIXL_ASSERT((shift > 0) &&
1733 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1734 for (int i = 0; i < laneCount; i++) {
1735 uint64_t src_lane = src.Uint(vform, i);
1736 uint64_t dst_lane = dst.Uint(vform, i);
1737 uint64_t shifted;
1738 uint64_t mask;
1739 if (shift == 64) {
1740 shifted = 0;
1741 mask = 0;
1742 } else {
1743 shifted = src_lane >> shift;
1744 mask = MaxUintFromFormat(vform) >> shift;
1745 }
1746 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1747 }
1748 return dst;
1749 }
1750
1751
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1752 LogicVRegister Simulator::ushr(VectorFormat vform,
1753 LogicVRegister dst,
1754 const LogicVRegister& src,
1755 int shift) {
1756 VIXL_ASSERT(shift >= 0);
1757 SimVRegister temp;
1758 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1759 return ushl(vform, dst, src, shiftreg);
1760 }
1761
1762
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1763 LogicVRegister Simulator::sshr(VectorFormat vform,
1764 LogicVRegister dst,
1765 const LogicVRegister& src,
1766 int shift) {
1767 VIXL_ASSERT(shift >= 0);
1768 SimVRegister temp;
1769 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1770 return sshl(vform, dst, src, shiftreg);
1771 }
1772
1773
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1774 LogicVRegister Simulator::ssra(VectorFormat vform,
1775 LogicVRegister dst,
1776 const LogicVRegister& src,
1777 int shift) {
1778 SimVRegister temp;
1779 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1780 return add(vform, dst, dst, shifted_reg);
1781 }
1782
1783
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1784 LogicVRegister Simulator::usra(VectorFormat vform,
1785 LogicVRegister dst,
1786 const LogicVRegister& src,
1787 int shift) {
1788 SimVRegister temp;
1789 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1790 return add(vform, dst, dst, shifted_reg);
1791 }
1792
1793
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1794 LogicVRegister Simulator::srsra(VectorFormat vform,
1795 LogicVRegister dst,
1796 const LogicVRegister& src,
1797 int shift) {
1798 SimVRegister temp;
1799 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1800 return add(vform, dst, dst, shifted_reg);
1801 }
1802
1803
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1804 LogicVRegister Simulator::ursra(VectorFormat vform,
1805 LogicVRegister dst,
1806 const LogicVRegister& src,
1807 int shift) {
1808 SimVRegister temp;
1809 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1810 return add(vform, dst, dst, shifted_reg);
1811 }
1812
1813
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1814 LogicVRegister Simulator::cls(VectorFormat vform,
1815 LogicVRegister dst,
1816 const LogicVRegister& src) {
1817 uint64_t result[16];
1818 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1819 int laneCount = LaneCountFromFormat(vform);
1820 for (int i = 0; i < laneCount; i++) {
1821 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1822 }
1823
1824 dst.ClearForWrite(vform);
1825 for (int i = 0; i < laneCount; ++i) {
1826 dst.SetUint(vform, i, result[i]);
1827 }
1828 return dst;
1829 }
1830
1831
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1832 LogicVRegister Simulator::clz(VectorFormat vform,
1833 LogicVRegister dst,
1834 const LogicVRegister& src) {
1835 uint64_t result[16];
1836 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1837 int laneCount = LaneCountFromFormat(vform);
1838 for (int i = 0; i < laneCount; i++) {
1839 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1840 }
1841
1842 dst.ClearForWrite(vform);
1843 for (int i = 0; i < laneCount; ++i) {
1844 dst.SetUint(vform, i, result[i]);
1845 }
1846 return dst;
1847 }
1848
1849
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1850 LogicVRegister Simulator::cnt(VectorFormat vform,
1851 LogicVRegister dst,
1852 const LogicVRegister& src) {
1853 uint64_t result[16];
1854 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1855 int laneCount = LaneCountFromFormat(vform);
1856 for (int i = 0; i < laneCount; i++) {
1857 uint64_t value = src.Uint(vform, i);
1858 result[i] = 0;
1859 for (int j = 0; j < laneSizeInBits; j++) {
1860 result[i] += (value & 1);
1861 value >>= 1;
1862 }
1863 }
1864
1865 dst.ClearForWrite(vform);
1866 for (int i = 0; i < laneCount; ++i) {
1867 dst.SetUint(vform, i, result[i]);
1868 }
1869 return dst;
1870 }
1871
1872
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1873 LogicVRegister Simulator::sshl(VectorFormat vform,
1874 LogicVRegister dst,
1875 const LogicVRegister& src1,
1876 const LogicVRegister& src2) {
1877 dst.ClearForWrite(vform);
1878 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1879 int8_t shift_val = src2.Int(vform, i);
1880 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1881
1882 // Set signed saturation state.
1883 if ((shift_val > CountLeadingSignBits(lj_src_val)) &&
1884 (lj_src_val != 0)) {
1885 dst.SetSignedSat(i, lj_src_val >= 0);
1886 }
1887
1888 // Set unsigned saturation state.
1889 if (lj_src_val < 0) {
1890 dst.SetUnsignedSat(i, false);
1891 } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1892 (lj_src_val != 0)) {
1893 dst.SetUnsignedSat(i, true);
1894 }
1895
1896 int64_t src_val = src1.Int(vform, i);
1897 if (shift_val > 63) {
1898 dst.SetInt(vform, i, 0);
1899 } else if (shift_val < -63) {
1900 dst.SetRounding(i, src_val < 0);
1901 dst.SetInt(vform, i, (src_val < 0) ? -1 : 0);
1902 } else {
1903 if (shift_val < 0) {
1904 // Set rounding state. Rounding only needed on right shifts.
1905 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1906 dst.SetRounding(i, true);
1907 }
1908 src_val >>= -shift_val;
1909 } else {
1910 src_val <<= shift_val;
1911 }
1912 dst.SetInt(vform, i, src_val);
1913 }
1914 }
1915 return dst;
1916 }
1917
1918
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1919 LogicVRegister Simulator::ushl(VectorFormat vform,
1920 LogicVRegister dst,
1921 const LogicVRegister& src1,
1922 const LogicVRegister& src2) {
1923 dst.ClearForWrite(vform);
1924 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1925 int8_t shift_val = src2.Int(vform, i);
1926 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1927
1928 // Set saturation state.
1929 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1930 dst.SetUnsignedSat(i, true);
1931 }
1932
1933 uint64_t src_val = src1.Uint(vform, i);
1934 if ((shift_val > 63) || (shift_val < -64)) {
1935 dst.SetUint(vform, i, 0);
1936 } else {
1937 if (shift_val < 0) {
1938 // Set rounding state. Rounding only needed on right shifts.
1939 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1940 dst.SetRounding(i, true);
1941 }
1942
1943 if (shift_val == -64) {
1944 src_val = 0;
1945 } else {
1946 src_val >>= -shift_val;
1947 }
1948 } else {
1949 src_val <<= shift_val;
1950 }
1951 dst.SetUint(vform, i, src_val);
1952 }
1953 }
1954 return dst;
1955 }
1956
1957
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1958 LogicVRegister Simulator::neg(VectorFormat vform,
1959 LogicVRegister dst,
1960 const LogicVRegister& src) {
1961 dst.ClearForWrite(vform);
1962 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1963 // Test for signed saturation.
1964 int64_t sa = src.Int(vform, i);
1965 if (sa == MinIntFromFormat(vform)) {
1966 dst.SetSignedSat(i, true);
1967 }
1968 dst.SetInt(vform, i, -sa);
1969 }
1970 return dst;
1971 }
1972
1973
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1974 LogicVRegister Simulator::suqadd(VectorFormat vform,
1975 LogicVRegister dst,
1976 const LogicVRegister& src) {
1977 dst.ClearForWrite(vform);
1978 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1979 int64_t sa = dst.IntLeftJustified(vform, i);
1980 uint64_t ub = src.UintLeftJustified(vform, i);
1981 int64_t sr = sa + ub;
1982
1983 if (sr < sa) { // Test for signed positive saturation.
1984 dst.SetInt(vform, i, MaxIntFromFormat(vform));
1985 } else {
1986 dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i));
1987 }
1988 }
1989 return dst;
1990 }
1991
1992
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1993 LogicVRegister Simulator::usqadd(VectorFormat vform,
1994 LogicVRegister dst,
1995 const LogicVRegister& src) {
1996 dst.ClearForWrite(vform);
1997 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1998 uint64_t ua = dst.UintLeftJustified(vform, i);
1999 int64_t sb = src.IntLeftJustified(vform, i);
2000 uint64_t ur = ua + sb;
2001
2002 if ((sb > 0) && (ur <= ua)) {
2003 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2004 } else if ((sb < 0) && (ur >= ua)) {
2005 dst.SetUint(vform, i, 0); // Negative saturation.
2006 } else {
2007 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
2008 }
2009 }
2010 return dst;
2011 }
2012
2013
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2014 LogicVRegister Simulator::abs(VectorFormat vform,
2015 LogicVRegister dst,
2016 const LogicVRegister& src) {
2017 dst.ClearForWrite(vform);
2018 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2019 // Test for signed saturation.
2020 int64_t sa = src.Int(vform, i);
2021 if (sa == MinIntFromFormat(vform)) {
2022 dst.SetSignedSat(i, true);
2023 }
2024 if (sa < 0) {
2025 dst.SetInt(vform, i, -sa);
2026 } else {
2027 dst.SetInt(vform, i, sa);
2028 }
2029 }
2030 return dst;
2031 }
2032
2033
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dstIsSigned,const LogicVRegister & src,bool srcIsSigned)2034 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2035 LogicVRegister dst,
2036 bool dstIsSigned,
2037 const LogicVRegister& src,
2038 bool srcIsSigned) {
2039 bool upperhalf = false;
2040 VectorFormat srcform = kFormatUndefined;
2041 int64_t ssrc[8];
2042 uint64_t usrc[8];
2043
2044 switch (dstform) {
2045 case kFormat8B : upperhalf = false; srcform = kFormat8H; break;
2046 case kFormat16B: upperhalf = true; srcform = kFormat8H; break;
2047 case kFormat4H : upperhalf = false; srcform = kFormat4S; break;
2048 case kFormat8H : upperhalf = true; srcform = kFormat4S; break;
2049 case kFormat2S : upperhalf = false; srcform = kFormat2D; break;
2050 case kFormat4S : upperhalf = true; srcform = kFormat2D; break;
2051 case kFormatB : upperhalf = false; srcform = kFormatH; break;
2052 case kFormatH : upperhalf = false; srcform = kFormatS; break;
2053 case kFormatS : upperhalf = false; srcform = kFormatD; break;
2054 default:VIXL_UNIMPLEMENTED();
2055 }
2056
2057 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2058 ssrc[i] = src.Int(srcform, i);
2059 usrc[i] = src.Uint(srcform, i);
2060 }
2061
2062 int offset;
2063 if (upperhalf) {
2064 offset = LaneCountFromFormat(dstform) / 2;
2065 } else {
2066 offset = 0;
2067 dst.ClearForWrite(dstform);
2068 }
2069
2070 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2071 // Test for signed saturation
2072 if (ssrc[i] > MaxIntFromFormat(dstform)) {
2073 dst.SetSignedSat(offset + i, true);
2074 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
2075 dst.SetSignedSat(offset + i, false);
2076 }
2077
2078 // Test for unsigned saturation
2079 if (srcIsSigned) {
2080 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2081 dst.SetUnsignedSat(offset + i, true);
2082 } else if (ssrc[i] < 0) {
2083 dst.SetUnsignedSat(offset + i, false);
2084 }
2085 } else {
2086 if (usrc[i] > MaxUintFromFormat(dstform)) {
2087 dst.SetUnsignedSat(offset + i, true);
2088 }
2089 }
2090
2091 int64_t result;
2092 if (srcIsSigned) {
2093 result = ssrc[i] & MaxUintFromFormat(dstform);
2094 } else {
2095 result = usrc[i] & MaxUintFromFormat(dstform);
2096 }
2097
2098 if (dstIsSigned) {
2099 dst.SetInt(dstform, offset + i, result);
2100 } else {
2101 dst.SetUint(dstform, offset + i, result);
2102 }
2103 }
2104 return dst;
2105 }
2106
2107
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2108 LogicVRegister Simulator::xtn(VectorFormat vform,
2109 LogicVRegister dst,
2110 const LogicVRegister& src) {
2111 return extractnarrow(vform, dst, true, src, true);
2112 }
2113
2114
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2115 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2116 LogicVRegister dst,
2117 const LogicVRegister& src) {
2118 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2119 }
2120
2121
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2122 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2123 LogicVRegister dst,
2124 const LogicVRegister& src) {
2125 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2126 }
2127
2128
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2129 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2130 LogicVRegister dst,
2131 const LogicVRegister& src) {
2132 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2133 }
2134
2135
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool issigned)2136 LogicVRegister Simulator::absdiff(VectorFormat vform,
2137 LogicVRegister dst,
2138 const LogicVRegister& src1,
2139 const LogicVRegister& src2,
2140 bool issigned) {
2141 dst.ClearForWrite(vform);
2142 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2143 if (issigned) {
2144 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
2145 sr = sr > 0 ? sr : -sr;
2146 dst.SetInt(vform, i, sr);
2147 } else {
2148 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
2149 sr = sr > 0 ? sr : -sr;
2150 dst.SetUint(vform, i, sr);
2151 }
2152 }
2153 return dst;
2154 }
2155
2156
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2157 LogicVRegister Simulator::saba(VectorFormat vform,
2158 LogicVRegister dst,
2159 const LogicVRegister& src1,
2160 const LogicVRegister& src2) {
2161 SimVRegister temp;
2162 dst.ClearForWrite(vform);
2163 absdiff(vform, temp, src1, src2, true);
2164 add(vform, dst, dst, temp);
2165 return dst;
2166 }
2167
2168
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2169 LogicVRegister Simulator::uaba(VectorFormat vform,
2170 LogicVRegister dst,
2171 const LogicVRegister& src1,
2172 const LogicVRegister& src2) {
2173 SimVRegister temp;
2174 dst.ClearForWrite(vform);
2175 absdiff(vform, temp, src1, src2, false);
2176 add(vform, dst, dst, temp);
2177 return dst;
2178 }
2179
2180
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2181 LogicVRegister Simulator::not_(VectorFormat vform,
2182 LogicVRegister dst,
2183 const LogicVRegister& src) {
2184 dst.ClearForWrite(vform);
2185 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2186 dst.SetUint(vform, i, ~src.Uint(vform, i));
2187 }
2188 return dst;
2189 }
2190
2191
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2192 LogicVRegister Simulator::rbit(VectorFormat vform,
2193 LogicVRegister dst,
2194 const LogicVRegister& src) {
2195 uint64_t result[16];
2196 int laneCount = LaneCountFromFormat(vform);
2197 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
2198 uint64_t reversed_value;
2199 uint64_t value;
2200 for (int i = 0; i < laneCount; i++) {
2201 value = src.Uint(vform, i);
2202 reversed_value = 0;
2203 for (int j = 0; j < laneSizeInBits; j++) {
2204 reversed_value = (reversed_value << 1) | (value & 1);
2205 value >>= 1;
2206 }
2207 result[i] = reversed_value;
2208 }
2209
2210 dst.ClearForWrite(vform);
2211 for (int i = 0; i < laneCount; ++i) {
2212 dst.SetUint(vform, i, result[i]);
2213 }
2214 return dst;
2215 }
2216
2217
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int revSize)2218 LogicVRegister Simulator::rev(VectorFormat vform,
2219 LogicVRegister dst,
2220 const LogicVRegister& src,
2221 int revSize) {
2222 uint64_t result[16];
2223 int laneCount = LaneCountFromFormat(vform);
2224 int laneSize = LaneSizeInBytesFromFormat(vform);
2225 int lanesPerLoop = revSize / laneSize;
2226 for (int i = 0; i < laneCount; i += lanesPerLoop) {
2227 for (int j = 0; j < lanesPerLoop; j++) {
2228 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
2229 }
2230 }
2231 dst.ClearForWrite(vform);
2232 for (int i = 0; i < laneCount; ++i) {
2233 dst.SetUint(vform, i, result[i]);
2234 }
2235 return dst;
2236 }
2237
2238
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2239 LogicVRegister Simulator::rev16(VectorFormat vform,
2240 LogicVRegister dst,
2241 const LogicVRegister& src) {
2242 return rev(vform, dst, src, 2);
2243 }
2244
2245
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2246 LogicVRegister Simulator::rev32(VectorFormat vform,
2247 LogicVRegister dst,
2248 const LogicVRegister& src) {
2249 return rev(vform, dst, src, 4);
2250 }
2251
2252
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2253 LogicVRegister Simulator::rev64(VectorFormat vform,
2254 LogicVRegister dst,
2255 const LogicVRegister& src) {
2256 return rev(vform, dst, src, 8);
2257 }
2258
2259
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2260 LogicVRegister Simulator::addlp(VectorFormat vform,
2261 LogicVRegister dst,
2262 const LogicVRegister& src,
2263 bool is_signed,
2264 bool do_accumulate) {
2265 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2266
2267 int64_t sr[16];
2268 uint64_t ur[16];
2269
2270 int laneCount = LaneCountFromFormat(vform);
2271 for (int i = 0; i < laneCount; ++i) {
2272 if (is_signed) {
2273 sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1);
2274 } else {
2275 ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2276 }
2277 }
2278
2279 dst.ClearForWrite(vform);
2280 for (int i = 0; i < laneCount; ++i) {
2281 if (do_accumulate) {
2282 if (is_signed) {
2283 dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]);
2284 } else {
2285 dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]);
2286 }
2287 } else {
2288 if (is_signed) {
2289 dst.SetInt(vform, i, sr[i]);
2290 } else {
2291 dst.SetUint(vform, i, ur[i]);
2292 }
2293 }
2294 }
2295
2296 return dst;
2297 }
2298
2299
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2300 LogicVRegister Simulator::saddlp(VectorFormat vform,
2301 LogicVRegister dst,
2302 const LogicVRegister& src) {
2303 return addlp(vform, dst, src, true, false);
2304 }
2305
2306
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2307 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2308 LogicVRegister dst,
2309 const LogicVRegister& src) {
2310 return addlp(vform, dst, src, false, false);
2311 }
2312
2313
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2314 LogicVRegister Simulator::sadalp(VectorFormat vform,
2315 LogicVRegister dst,
2316 const LogicVRegister& src) {
2317 return addlp(vform, dst, src, true, true);
2318 }
2319
2320
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2321 LogicVRegister Simulator::uadalp(VectorFormat vform,
2322 LogicVRegister dst,
2323 const LogicVRegister& src) {
2324 return addlp(vform, dst, src, false, true);
2325 }
2326
2327
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2328 LogicVRegister Simulator::ext(VectorFormat vform,
2329 LogicVRegister dst,
2330 const LogicVRegister& src1,
2331 const LogicVRegister& src2,
2332 int index) {
2333 uint8_t result[16];
2334 int laneCount = LaneCountFromFormat(vform);
2335 for (int i = 0; i < laneCount - index; ++i) {
2336 result[i] = src1.Uint(vform, i + index);
2337 }
2338 for (int i = 0; i < index; ++i) {
2339 result[laneCount - index + i] = src2.Uint(vform, i);
2340 }
2341 dst.ClearForWrite(vform);
2342 for (int i = 0; i < laneCount; ++i) {
2343 dst.SetUint(vform, i, result[i]);
2344 }
2345 return dst;
2346 }
2347
2348
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2349 LogicVRegister Simulator::dup_element(VectorFormat vform,
2350 LogicVRegister dst,
2351 const LogicVRegister& src,
2352 int src_index) {
2353 int laneCount = LaneCountFromFormat(vform);
2354 uint64_t value = src.Uint(vform, src_index);
2355 dst.ClearForWrite(vform);
2356 for (int i = 0; i < laneCount; ++i) {
2357 dst.SetUint(vform, i, value);
2358 }
2359 return dst;
2360 }
2361
2362
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2363 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2364 LogicVRegister dst,
2365 uint64_t imm) {
2366 int laneCount = LaneCountFromFormat(vform);
2367 uint64_t value = imm & MaxUintFromFormat(vform);
2368 dst.ClearForWrite(vform);
2369 for (int i = 0; i < laneCount; ++i) {
2370 dst.SetUint(vform, i, value);
2371 }
2372 return dst;
2373 }
2374
2375
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2376 LogicVRegister Simulator::ins_element(VectorFormat vform,
2377 LogicVRegister dst,
2378 int dst_index,
2379 const LogicVRegister& src,
2380 int src_index) {
2381 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2382 return dst;
2383 }
2384
2385
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2386 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2387 LogicVRegister dst,
2388 int dst_index,
2389 uint64_t imm) {
2390 uint64_t value = imm & MaxUintFromFormat(vform);
2391 dst.SetUint(vform, dst_index, value);
2392 return dst;
2393 }
2394
2395
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)2396 LogicVRegister Simulator::movi(VectorFormat vform,
2397 LogicVRegister dst,
2398 uint64_t imm) {
2399 int laneCount = LaneCountFromFormat(vform);
2400 dst.ClearForWrite(vform);
2401 for (int i = 0; i < laneCount; ++i) {
2402 dst.SetUint(vform, i, imm);
2403 }
2404 return dst;
2405 }
2406
2407
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)2408 LogicVRegister Simulator::mvni(VectorFormat vform,
2409 LogicVRegister dst,
2410 uint64_t imm) {
2411 int laneCount = LaneCountFromFormat(vform);
2412 dst.ClearForWrite(vform);
2413 for (int i = 0; i < laneCount; ++i) {
2414 dst.SetUint(vform, i, ~imm);
2415 }
2416 return dst;
2417 }
2418
2419
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)2420 LogicVRegister Simulator::orr(VectorFormat vform,
2421 LogicVRegister dst,
2422 const LogicVRegister& src,
2423 uint64_t imm) {
2424 uint64_t result[16];
2425 int laneCount = LaneCountFromFormat(vform);
2426 for (int i = 0; i < laneCount; ++i) {
2427 result[i] = src.Uint(vform, i) | imm;
2428 }
2429 dst.ClearForWrite(vform);
2430 for (int i = 0; i < laneCount; ++i) {
2431 dst.SetUint(vform, i, result[i]);
2432 }
2433 return dst;
2434 }
2435
2436
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2437 LogicVRegister Simulator::uxtl(VectorFormat vform,
2438 LogicVRegister dst,
2439 const LogicVRegister& src) {
2440 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2441
2442 dst.ClearForWrite(vform);
2443 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2444 dst.SetUint(vform, i, src.Uint(vform_half, i));
2445 }
2446 return dst;
2447 }
2448
2449
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2450 LogicVRegister Simulator::sxtl(VectorFormat vform,
2451 LogicVRegister dst,
2452 const LogicVRegister& src) {
2453 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2454
2455 dst.ClearForWrite(vform);
2456 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2457 dst.SetInt(vform, i, src.Int(vform_half, i));
2458 }
2459 return dst;
2460 }
2461
2462
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2463 LogicVRegister Simulator::uxtl2(VectorFormat vform,
2464 LogicVRegister dst,
2465 const LogicVRegister& src) {
2466 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2467 int lane_count = LaneCountFromFormat(vform);
2468
2469 dst.ClearForWrite(vform);
2470 for (int i = 0; i < lane_count; i++) {
2471 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2472 }
2473 return dst;
2474 }
2475
2476
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2477 LogicVRegister Simulator::sxtl2(VectorFormat vform,
2478 LogicVRegister dst,
2479 const LogicVRegister& src) {
2480 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2481 int lane_count = LaneCountFromFormat(vform);
2482
2483 dst.ClearForWrite(vform);
2484 for (int i = 0; i < lane_count; i++) {
2485 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2486 }
2487 return dst;
2488 }
2489
2490
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2491 LogicVRegister Simulator::shrn(VectorFormat vform,
2492 LogicVRegister dst,
2493 const LogicVRegister& src,
2494 int shift) {
2495 SimVRegister temp;
2496 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2497 VectorFormat vform_dst = vform;
2498 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2499 return extractnarrow(vform_dst, dst, false, shifted_src, false);
2500 }
2501
2502
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2503 LogicVRegister Simulator::shrn2(VectorFormat vform,
2504 LogicVRegister dst,
2505 const LogicVRegister& src,
2506 int shift) {
2507 SimVRegister temp;
2508 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2509 VectorFormat vformdst = vform;
2510 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2511 return extractnarrow(vformdst, dst, false, shifted_src, false);
2512 }
2513
2514
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2515 LogicVRegister Simulator::rshrn(VectorFormat vform,
2516 LogicVRegister dst,
2517 const LogicVRegister& src,
2518 int shift) {
2519 SimVRegister temp;
2520 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2521 VectorFormat vformdst = vform;
2522 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2523 return extractnarrow(vformdst, dst, false, shifted_src, false);
2524 }
2525
2526
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2527 LogicVRegister Simulator::rshrn2(VectorFormat vform,
2528 LogicVRegister dst,
2529 const LogicVRegister& src,
2530 int shift) {
2531 SimVRegister temp;
2532 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2533 VectorFormat vformdst = vform;
2534 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2535 return extractnarrow(vformdst, dst, false, shifted_src, false);
2536 }
2537
2538
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2539 LogicVRegister Simulator::tbl(VectorFormat vform,
2540 LogicVRegister dst,
2541 const LogicVRegister& tab,
2542 const LogicVRegister& ind) {
2543 movi(vform, dst, 0);
2544 return tbx(vform, dst, tab, ind);
2545 }
2546
2547
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2548 LogicVRegister Simulator::tbl(VectorFormat vform,
2549 LogicVRegister dst,
2550 const LogicVRegister& tab,
2551 const LogicVRegister& tab2,
2552 const LogicVRegister& ind) {
2553 movi(vform, dst, 0);
2554 return tbx(vform, dst, tab, tab2, ind);
2555 }
2556
2557
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2558 LogicVRegister Simulator::tbl(VectorFormat vform,
2559 LogicVRegister dst,
2560 const LogicVRegister& tab,
2561 const LogicVRegister& tab2,
2562 const LogicVRegister& tab3,
2563 const LogicVRegister& ind) {
2564 movi(vform, dst, 0);
2565 return tbx(vform, dst, tab, tab2, tab3, ind);
2566 }
2567
2568
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2569 LogicVRegister Simulator::tbl(VectorFormat vform,
2570 LogicVRegister dst,
2571 const LogicVRegister& tab,
2572 const LogicVRegister& tab2,
2573 const LogicVRegister& tab3,
2574 const LogicVRegister& tab4,
2575 const LogicVRegister& ind) {
2576 movi(vform, dst, 0);
2577 return tbx(vform, dst, tab, tab2, tab3, tab4, ind);
2578 }
2579
2580
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2581 LogicVRegister Simulator::tbx(VectorFormat vform,
2582 LogicVRegister dst,
2583 const LogicVRegister& tab,
2584 const LogicVRegister& ind) {
2585 dst.ClearForWrite(vform);
2586 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2587 unsigned j = ind.Uint(vform, i);
2588 switch (j >> 4) {
2589 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2590 }
2591 }
2592 return dst;
2593 }
2594
2595
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2596 LogicVRegister Simulator::tbx(VectorFormat vform,
2597 LogicVRegister dst,
2598 const LogicVRegister& tab,
2599 const LogicVRegister& tab2,
2600 const LogicVRegister& ind) {
2601 dst.ClearForWrite(vform);
2602 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2603 unsigned j = ind.Uint(vform, i);
2604 switch (j >> 4) {
2605 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2606 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
2607 }
2608 }
2609 return dst;
2610 }
2611
2612
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2613 LogicVRegister Simulator::tbx(VectorFormat vform,
2614 LogicVRegister dst,
2615 const LogicVRegister& tab,
2616 const LogicVRegister& tab2,
2617 const LogicVRegister& tab3,
2618 const LogicVRegister& ind) {
2619 dst.ClearForWrite(vform);
2620 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2621 unsigned j = ind.Uint(vform, i);
2622 switch (j >> 4) {
2623 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2624 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
2625 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
2626 }
2627 }
2628 return dst;
2629 }
2630
2631
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2632 LogicVRegister Simulator::tbx(VectorFormat vform,
2633 LogicVRegister dst,
2634 const LogicVRegister& tab,
2635 const LogicVRegister& tab2,
2636 const LogicVRegister& tab3,
2637 const LogicVRegister& tab4,
2638 const LogicVRegister& ind) {
2639 dst.ClearForWrite(vform);
2640 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2641 unsigned j = ind.Uint(vform, i);
2642 switch (j >> 4) {
2643 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2644 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
2645 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
2646 case 3: dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); break;
2647 }
2648 }
2649 return dst;
2650 }
2651
2652
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2653 LogicVRegister Simulator::uqshrn(VectorFormat vform,
2654 LogicVRegister dst,
2655 const LogicVRegister& src,
2656 int shift) {
2657 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2658 }
2659
2660
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2661 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
2662 LogicVRegister dst,
2663 const LogicVRegister& src,
2664 int shift) {
2665 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2666 }
2667
2668
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2669 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
2670 LogicVRegister dst,
2671 const LogicVRegister& src,
2672 int shift) {
2673 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2674 }
2675
2676
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2677 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
2678 LogicVRegister dst,
2679 const LogicVRegister& src,
2680 int shift) {
2681 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2682 }
2683
2684
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2685 LogicVRegister Simulator::sqshrn(VectorFormat vform,
2686 LogicVRegister dst,
2687 const LogicVRegister& src,
2688 int shift) {
2689 SimVRegister temp;
2690 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2691 VectorFormat vformdst = vform;
2692 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2693 return sqxtn(vformdst, dst, shifted_src);
2694 }
2695
2696
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2697 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
2698 LogicVRegister dst,
2699 const LogicVRegister& src,
2700 int shift) {
2701 SimVRegister temp;
2702 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2703 VectorFormat vformdst = vform;
2704 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2705 return sqxtn(vformdst, dst, shifted_src);
2706 }
2707
2708
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2709 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
2710 LogicVRegister dst,
2711 const LogicVRegister& src,
2712 int shift) {
2713 SimVRegister temp;
2714 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2715 VectorFormat vformdst = vform;
2716 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2717 return sqxtn(vformdst, dst, shifted_src);
2718 }
2719
2720
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2721 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
2722 LogicVRegister dst,
2723 const LogicVRegister& src,
2724 int shift) {
2725 SimVRegister temp;
2726 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2727 VectorFormat vformdst = vform;
2728 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2729 return sqxtn(vformdst, dst, shifted_src);
2730 }
2731
2732
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2733 LogicVRegister Simulator::sqshrun(VectorFormat vform,
2734 LogicVRegister dst,
2735 const LogicVRegister& src,
2736 int shift) {
2737 SimVRegister temp;
2738 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2739 VectorFormat vformdst = vform;
2740 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2741 return sqxtun(vformdst, dst, shifted_src);
2742 }
2743
2744
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2745 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
2746 LogicVRegister dst,
2747 const LogicVRegister& src,
2748 int shift) {
2749 SimVRegister temp;
2750 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2751 VectorFormat vformdst = vform;
2752 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2753 return sqxtun(vformdst, dst, shifted_src);
2754 }
2755
2756
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2757 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
2758 LogicVRegister dst,
2759 const LogicVRegister& src,
2760 int shift) {
2761 SimVRegister temp;
2762 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2763 VectorFormat vformdst = vform;
2764 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2765 return sqxtun(vformdst, dst, shifted_src);
2766 }
2767
2768
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2769 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
2770 LogicVRegister dst,
2771 const LogicVRegister& src,
2772 int shift) {
2773 SimVRegister temp;
2774 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2775 VectorFormat vformdst = vform;
2776 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2777 return sqxtun(vformdst, dst, shifted_src);
2778 }
2779
2780
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2781 LogicVRegister Simulator::uaddl(VectorFormat vform,
2782 LogicVRegister dst,
2783 const LogicVRegister& src1,
2784 const LogicVRegister& src2) {
2785 SimVRegister temp1, temp2;
2786 uxtl(vform, temp1, src1);
2787 uxtl(vform, temp2, src2);
2788 add(vform, dst, temp1, temp2);
2789 return dst;
2790 }
2791
2792
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2793 LogicVRegister Simulator::uaddl2(VectorFormat vform,
2794 LogicVRegister dst,
2795 const LogicVRegister& src1,
2796 const LogicVRegister& src2) {
2797 SimVRegister temp1, temp2;
2798 uxtl2(vform, temp1, src1);
2799 uxtl2(vform, temp2, src2);
2800 add(vform, dst, temp1, temp2);
2801 return dst;
2802 }
2803
2804
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2805 LogicVRegister Simulator::uaddw(VectorFormat vform,
2806 LogicVRegister dst,
2807 const LogicVRegister& src1,
2808 const LogicVRegister& src2) {
2809 SimVRegister temp;
2810 uxtl(vform, temp, src2);
2811 add(vform, dst, src1, temp);
2812 return dst;
2813 }
2814
2815
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2816 LogicVRegister Simulator::uaddw2(VectorFormat vform,
2817 LogicVRegister dst,
2818 const LogicVRegister& src1,
2819 const LogicVRegister& src2) {
2820 SimVRegister temp;
2821 uxtl2(vform, temp, src2);
2822 add(vform, dst, src1, temp);
2823 return dst;
2824 }
2825
2826
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2827 LogicVRegister Simulator::saddl(VectorFormat vform,
2828 LogicVRegister dst,
2829 const LogicVRegister& src1,
2830 const LogicVRegister& src2) {
2831 SimVRegister temp1, temp2;
2832 sxtl(vform, temp1, src1);
2833 sxtl(vform, temp2, src2);
2834 add(vform, dst, temp1, temp2);
2835 return dst;
2836 }
2837
2838
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2839 LogicVRegister Simulator::saddl2(VectorFormat vform,
2840 LogicVRegister dst,
2841 const LogicVRegister& src1,
2842 const LogicVRegister& src2) {
2843 SimVRegister temp1, temp2;
2844 sxtl2(vform, temp1, src1);
2845 sxtl2(vform, temp2, src2);
2846 add(vform, dst, temp1, temp2);
2847 return dst;
2848 }
2849
2850
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2851 LogicVRegister Simulator::saddw(VectorFormat vform,
2852 LogicVRegister dst,
2853 const LogicVRegister& src1,
2854 const LogicVRegister& src2) {
2855 SimVRegister temp;
2856 sxtl(vform, temp, src2);
2857 add(vform, dst, src1, temp);
2858 return dst;
2859 }
2860
2861
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2862 LogicVRegister Simulator::saddw2(VectorFormat vform,
2863 LogicVRegister dst,
2864 const LogicVRegister& src1,
2865 const LogicVRegister& src2) {
2866 SimVRegister temp;
2867 sxtl2(vform, temp, src2);
2868 add(vform, dst, src1, temp);
2869 return dst;
2870 }
2871
2872
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2873 LogicVRegister Simulator::usubl(VectorFormat vform,
2874 LogicVRegister dst,
2875 const LogicVRegister& src1,
2876 const LogicVRegister& src2) {
2877 SimVRegister temp1, temp2;
2878 uxtl(vform, temp1, src1);
2879 uxtl(vform, temp2, src2);
2880 sub(vform, dst, temp1, temp2);
2881 return dst;
2882 }
2883
2884
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2885 LogicVRegister Simulator::usubl2(VectorFormat vform,
2886 LogicVRegister dst,
2887 const LogicVRegister& src1,
2888 const LogicVRegister& src2) {
2889 SimVRegister temp1, temp2;
2890 uxtl2(vform, temp1, src1);
2891 uxtl2(vform, temp2, src2);
2892 sub(vform, dst, temp1, temp2);
2893 return dst;
2894 }
2895
2896
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2897 LogicVRegister Simulator::usubw(VectorFormat vform,
2898 LogicVRegister dst,
2899 const LogicVRegister& src1,
2900 const LogicVRegister& src2) {
2901 SimVRegister temp;
2902 uxtl(vform, temp, src2);
2903 sub(vform, dst, src1, temp);
2904 return dst;
2905 }
2906
2907
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2908 LogicVRegister Simulator::usubw2(VectorFormat vform,
2909 LogicVRegister dst,
2910 const LogicVRegister& src1,
2911 const LogicVRegister& src2) {
2912 SimVRegister temp;
2913 uxtl2(vform, temp, src2);
2914 sub(vform, dst, src1, temp);
2915 return dst;
2916 }
2917
2918
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2919 LogicVRegister Simulator::ssubl(VectorFormat vform,
2920 LogicVRegister dst,
2921 const LogicVRegister& src1,
2922 const LogicVRegister& src2) {
2923 SimVRegister temp1, temp2;
2924 sxtl(vform, temp1, src1);
2925 sxtl(vform, temp2, src2);
2926 sub(vform, dst, temp1, temp2);
2927 return dst;
2928 }
2929
2930
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2931 LogicVRegister Simulator::ssubl2(VectorFormat vform,
2932 LogicVRegister dst,
2933 const LogicVRegister& src1,
2934 const LogicVRegister& src2) {
2935 SimVRegister temp1, temp2;
2936 sxtl2(vform, temp1, src1);
2937 sxtl2(vform, temp2, src2);
2938 sub(vform, dst, temp1, temp2);
2939 return dst;
2940 }
2941
2942
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2943 LogicVRegister Simulator::ssubw(VectorFormat vform,
2944 LogicVRegister dst,
2945 const LogicVRegister& src1,
2946 const LogicVRegister& src2) {
2947 SimVRegister temp;
2948 sxtl(vform, temp, src2);
2949 sub(vform, dst, src1, temp);
2950 return dst;
2951 }
2952
2953
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2954 LogicVRegister Simulator::ssubw2(VectorFormat vform,
2955 LogicVRegister dst,
2956 const LogicVRegister& src1,
2957 const LogicVRegister& src2) {
2958 SimVRegister temp;
2959 sxtl2(vform, temp, src2);
2960 sub(vform, dst, src1, temp);
2961 return dst;
2962 }
2963
2964
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2965 LogicVRegister Simulator::uabal(VectorFormat vform,
2966 LogicVRegister dst,
2967 const LogicVRegister& src1,
2968 const LogicVRegister& src2) {
2969 SimVRegister temp1, temp2;
2970 uxtl(vform, temp1, src1);
2971 uxtl(vform, temp2, src2);
2972 uaba(vform, dst, temp1, temp2);
2973 return dst;
2974 }
2975
2976
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2977 LogicVRegister Simulator::uabal2(VectorFormat vform,
2978 LogicVRegister dst,
2979 const LogicVRegister& src1,
2980 const LogicVRegister& src2) {
2981 SimVRegister temp1, temp2;
2982 uxtl2(vform, temp1, src1);
2983 uxtl2(vform, temp2, src2);
2984 uaba(vform, dst, temp1, temp2);
2985 return dst;
2986 }
2987
2988
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2989 LogicVRegister Simulator::sabal(VectorFormat vform,
2990 LogicVRegister dst,
2991 const LogicVRegister& src1,
2992 const LogicVRegister& src2) {
2993 SimVRegister temp1, temp2;
2994 sxtl(vform, temp1, src1);
2995 sxtl(vform, temp2, src2);
2996 saba(vform, dst, temp1, temp2);
2997 return dst;
2998 }
2999
3000
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3001 LogicVRegister Simulator::sabal2(VectorFormat vform,
3002 LogicVRegister dst,
3003 const LogicVRegister& src1,
3004 const LogicVRegister& src2) {
3005 SimVRegister temp1, temp2;
3006 sxtl2(vform, temp1, src1);
3007 sxtl2(vform, temp2, src2);
3008 saba(vform, dst, temp1, temp2);
3009 return dst;
3010 }
3011
3012
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3013 LogicVRegister Simulator::uabdl(VectorFormat vform,
3014 LogicVRegister dst,
3015 const LogicVRegister& src1,
3016 const LogicVRegister& src2) {
3017 SimVRegister temp1, temp2;
3018 uxtl(vform, temp1, src1);
3019 uxtl(vform, temp2, src2);
3020 absdiff(vform, dst, temp1, temp2, false);
3021 return dst;
3022 }
3023
3024
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3025 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3026 LogicVRegister dst,
3027 const LogicVRegister& src1,
3028 const LogicVRegister& src2) {
3029 SimVRegister temp1, temp2;
3030 uxtl2(vform, temp1, src1);
3031 uxtl2(vform, temp2, src2);
3032 absdiff(vform, dst, temp1, temp2, false);
3033 return dst;
3034 }
3035
3036
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3037 LogicVRegister Simulator::sabdl(VectorFormat vform,
3038 LogicVRegister dst,
3039 const LogicVRegister& src1,
3040 const LogicVRegister& src2) {
3041 SimVRegister temp1, temp2;
3042 sxtl(vform, temp1, src1);
3043 sxtl(vform, temp2, src2);
3044 absdiff(vform, dst, temp1, temp2, true);
3045 return dst;
3046 }
3047
3048
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3049 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3050 LogicVRegister dst,
3051 const LogicVRegister& src1,
3052 const LogicVRegister& src2) {
3053 SimVRegister temp1, temp2;
3054 sxtl2(vform, temp1, src1);
3055 sxtl2(vform, temp2, src2);
3056 absdiff(vform, dst, temp1, temp2, true);
3057 return dst;
3058 }
3059
3060
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3061 LogicVRegister Simulator::umull(VectorFormat vform,
3062 LogicVRegister dst,
3063 const LogicVRegister& src1,
3064 const LogicVRegister& src2) {
3065 SimVRegister temp1, temp2;
3066 uxtl(vform, temp1, src1);
3067 uxtl(vform, temp2, src2);
3068 mul(vform, dst, temp1, temp2);
3069 return dst;
3070 }
3071
3072
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3073 LogicVRegister Simulator::umull2(VectorFormat vform,
3074 LogicVRegister dst,
3075 const LogicVRegister& src1,
3076 const LogicVRegister& src2) {
3077 SimVRegister temp1, temp2;
3078 uxtl2(vform, temp1, src1);
3079 uxtl2(vform, temp2, src2);
3080 mul(vform, dst, temp1, temp2);
3081 return dst;
3082 }
3083
3084
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3085 LogicVRegister Simulator::smull(VectorFormat vform,
3086 LogicVRegister dst,
3087 const LogicVRegister& src1,
3088 const LogicVRegister& src2) {
3089 SimVRegister temp1, temp2;
3090 sxtl(vform, temp1, src1);
3091 sxtl(vform, temp2, src2);
3092 mul(vform, dst, temp1, temp2);
3093 return dst;
3094 }
3095
3096
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3097 LogicVRegister Simulator::smull2(VectorFormat vform,
3098 LogicVRegister dst,
3099 const LogicVRegister& src1,
3100 const LogicVRegister& src2) {
3101 SimVRegister temp1, temp2;
3102 sxtl2(vform, temp1, src1);
3103 sxtl2(vform, temp2, src2);
3104 mul(vform, dst, temp1, temp2);
3105 return dst;
3106 }
3107
3108
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3109 LogicVRegister Simulator::umlsl(VectorFormat vform,
3110 LogicVRegister dst,
3111 const LogicVRegister& src1,
3112 const LogicVRegister& src2) {
3113 SimVRegister temp1, temp2;
3114 uxtl(vform, temp1, src1);
3115 uxtl(vform, temp2, src2);
3116 mls(vform, dst, temp1, temp2);
3117 return dst;
3118 }
3119
3120
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3121 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3122 LogicVRegister dst,
3123 const LogicVRegister& src1,
3124 const LogicVRegister& src2) {
3125 SimVRegister temp1, temp2;
3126 uxtl2(vform, temp1, src1);
3127 uxtl2(vform, temp2, src2);
3128 mls(vform, dst, temp1, temp2);
3129 return dst;
3130 }
3131
3132
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3133 LogicVRegister Simulator::smlsl(VectorFormat vform,
3134 LogicVRegister dst,
3135 const LogicVRegister& src1,
3136 const LogicVRegister& src2) {
3137 SimVRegister temp1, temp2;
3138 sxtl(vform, temp1, src1);
3139 sxtl(vform, temp2, src2);
3140 mls(vform, dst, temp1, temp2);
3141 return dst;
3142 }
3143
3144
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3145 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3146 LogicVRegister dst,
3147 const LogicVRegister& src1,
3148 const LogicVRegister& src2) {
3149 SimVRegister temp1, temp2;
3150 sxtl2(vform, temp1, src1);
3151 sxtl2(vform, temp2, src2);
3152 mls(vform, dst, temp1, temp2);
3153 return dst;
3154 }
3155
3156
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3157 LogicVRegister Simulator::umlal(VectorFormat vform,
3158 LogicVRegister dst,
3159 const LogicVRegister& src1,
3160 const LogicVRegister& src2) {
3161 SimVRegister temp1, temp2;
3162 uxtl(vform, temp1, src1);
3163 uxtl(vform, temp2, src2);
3164 mla(vform, dst, temp1, temp2);
3165 return dst;
3166 }
3167
3168
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3169 LogicVRegister Simulator::umlal2(VectorFormat vform,
3170 LogicVRegister dst,
3171 const LogicVRegister& src1,
3172 const LogicVRegister& src2) {
3173 SimVRegister temp1, temp2;
3174 uxtl2(vform, temp1, src1);
3175 uxtl2(vform, temp2, src2);
3176 mla(vform, dst, temp1, temp2);
3177 return dst;
3178 }
3179
3180
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3181 LogicVRegister Simulator::smlal(VectorFormat vform,
3182 LogicVRegister dst,
3183 const LogicVRegister& src1,
3184 const LogicVRegister& src2) {
3185 SimVRegister temp1, temp2;
3186 sxtl(vform, temp1, src1);
3187 sxtl(vform, temp2, src2);
3188 mla(vform, dst, temp1, temp2);
3189 return dst;
3190 }
3191
3192
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3193 LogicVRegister Simulator::smlal2(VectorFormat vform,
3194 LogicVRegister dst,
3195 const LogicVRegister& src1,
3196 const LogicVRegister& src2) {
3197 SimVRegister temp1, temp2;
3198 sxtl2(vform, temp1, src1);
3199 sxtl2(vform, temp2, src2);
3200 mla(vform, dst, temp1, temp2);
3201 return dst;
3202 }
3203
3204
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3205 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3206 LogicVRegister dst,
3207 const LogicVRegister& src1,
3208 const LogicVRegister& src2) {
3209 SimVRegister temp;
3210 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3211 return add(vform, dst, dst, product).SignedSaturate(vform);
3212 }
3213
3214
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3215 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3216 LogicVRegister dst,
3217 const LogicVRegister& src1,
3218 const LogicVRegister& src2) {
3219 SimVRegister temp;
3220 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3221 return add(vform, dst, dst, product).SignedSaturate(vform);
3222 }
3223
3224
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3225 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3226 LogicVRegister dst,
3227 const LogicVRegister& src1,
3228 const LogicVRegister& src2) {
3229 SimVRegister temp;
3230 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3231 return sub(vform, dst, dst, product).SignedSaturate(vform);
3232 }
3233
3234
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3235 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3236 LogicVRegister dst,
3237 const LogicVRegister& src1,
3238 const LogicVRegister& src2) {
3239 SimVRegister temp;
3240 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3241 return sub(vform, dst, dst, product).SignedSaturate(vform);
3242 }
3243
3244
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3245 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3246 LogicVRegister dst,
3247 const LogicVRegister& src1,
3248 const LogicVRegister& src2) {
3249 SimVRegister temp;
3250 LogicVRegister product = smull(vform, temp, src1, src2);
3251 return add(vform, dst, product, product).SignedSaturate(vform);
3252 }
3253
3254
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3255 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3256 LogicVRegister dst,
3257 const LogicVRegister& src1,
3258 const LogicVRegister& src2) {
3259 SimVRegister temp;
3260 LogicVRegister product = smull2(vform, temp, src1, src2);
3261 return add(vform, dst, product, product).SignedSaturate(vform);
3262 }
3263
3264
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3265 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3266 LogicVRegister dst,
3267 const LogicVRegister& src1,
3268 const LogicVRegister& src2,
3269 bool round) {
3270 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3271 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3272 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3273
3274 int esize = LaneSizeInBitsFromFormat(vform);
3275 int round_const = round ? (1 << (esize - 2)) : 0;
3276 int64_t product;
3277
3278 dst.ClearForWrite(vform);
3279 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3280 product = src1.Int(vform, i) * src2.Int(vform, i);
3281 product += round_const;
3282 product = product >> (esize - 1);
3283
3284 if (product > MaxIntFromFormat(vform)) {
3285 product = MaxIntFromFormat(vform);
3286 } else if (product < MinIntFromFormat(vform)) {
3287 product = MinIntFromFormat(vform);
3288 }
3289 dst.SetInt(vform, i, product);
3290 }
3291 return dst;
3292 }
3293
3294
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3295 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
3296 LogicVRegister dst,
3297 const LogicVRegister& src1,
3298 const LogicVRegister& src2) {
3299 return sqrdmulh(vform, dst, src1, src2, false);
3300 }
3301
3302
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3303 LogicVRegister Simulator::addhn(VectorFormat vform,
3304 LogicVRegister dst,
3305 const LogicVRegister& src1,
3306 const LogicVRegister& src2) {
3307 SimVRegister temp;
3308 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3309 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3310 return dst;
3311 }
3312
3313
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3314 LogicVRegister Simulator::addhn2(VectorFormat vform,
3315 LogicVRegister dst,
3316 const LogicVRegister& src1,
3317 const LogicVRegister& src2) {
3318 SimVRegister temp;
3319 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3320 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3321 return dst;
3322 }
3323
3324
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3325 LogicVRegister Simulator::raddhn(VectorFormat vform,
3326 LogicVRegister dst,
3327 const LogicVRegister& src1,
3328 const LogicVRegister& src2) {
3329 SimVRegister temp;
3330 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3331 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3332 return dst;
3333 }
3334
3335
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3336 LogicVRegister Simulator::raddhn2(VectorFormat vform,
3337 LogicVRegister dst,
3338 const LogicVRegister& src1,
3339 const LogicVRegister& src2) {
3340 SimVRegister temp;
3341 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3342 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3343 return dst;
3344 }
3345
3346
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3347 LogicVRegister Simulator::subhn(VectorFormat vform,
3348 LogicVRegister dst,
3349 const LogicVRegister& src1,
3350 const LogicVRegister& src2) {
3351 SimVRegister temp;
3352 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3353 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3354 return dst;
3355 }
3356
3357
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3358 LogicVRegister Simulator::subhn2(VectorFormat vform,
3359 LogicVRegister dst,
3360 const LogicVRegister& src1,
3361 const LogicVRegister& src2) {
3362 SimVRegister temp;
3363 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3364 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3365 return dst;
3366 }
3367
3368
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3369 LogicVRegister Simulator::rsubhn(VectorFormat vform,
3370 LogicVRegister dst,
3371 const LogicVRegister& src1,
3372 const LogicVRegister& src2) {
3373 SimVRegister temp;
3374 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3375 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3376 return dst;
3377 }
3378
3379
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3380 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
3381 LogicVRegister dst,
3382 const LogicVRegister& src1,
3383 const LogicVRegister& src2) {
3384 SimVRegister temp;
3385 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3386 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3387 return dst;
3388 }
3389
3390
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3391 LogicVRegister Simulator::trn1(VectorFormat vform,
3392 LogicVRegister dst,
3393 const LogicVRegister& src1,
3394 const LogicVRegister& src2) {
3395 uint64_t result[16];
3396 int laneCount = LaneCountFromFormat(vform);
3397 int pairs = laneCount / 2;
3398 for (int i = 0; i < pairs; ++i) {
3399 result[2 * i] = src1.Uint(vform, 2 * i);
3400 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
3401 }
3402
3403 dst.ClearForWrite(vform);
3404 for (int i = 0; i < laneCount; ++i) {
3405 dst.SetUint(vform, i, result[i]);
3406 }
3407 return dst;
3408 }
3409
3410
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3411 LogicVRegister Simulator::trn2(VectorFormat vform,
3412 LogicVRegister dst,
3413 const LogicVRegister& src1,
3414 const LogicVRegister& src2) {
3415 uint64_t result[16];
3416 int laneCount = LaneCountFromFormat(vform);
3417 int pairs = laneCount / 2;
3418 for (int i = 0; i < pairs; ++i) {
3419 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
3420 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
3421 }
3422
3423 dst.ClearForWrite(vform);
3424 for (int i = 0; i < laneCount; ++i) {
3425 dst.SetUint(vform, i, result[i]);
3426 }
3427 return dst;
3428 }
3429
3430
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3431 LogicVRegister Simulator::zip1(VectorFormat vform,
3432 LogicVRegister dst,
3433 const LogicVRegister& src1,
3434 const LogicVRegister& src2) {
3435 uint64_t result[16];
3436 int laneCount = LaneCountFromFormat(vform);
3437 int pairs = laneCount / 2;
3438 for (int i = 0; i < pairs; ++i) {
3439 result[2 * i] = src1.Uint(vform, i);
3440 result[(2 * i) + 1] = src2.Uint(vform, i);
3441 }
3442
3443 dst.ClearForWrite(vform);
3444 for (int i = 0; i < laneCount; ++i) {
3445 dst.SetUint(vform, i, result[i]);
3446 }
3447 return dst;
3448 }
3449
3450
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3451 LogicVRegister Simulator::zip2(VectorFormat vform,
3452 LogicVRegister dst,
3453 const LogicVRegister& src1,
3454 const LogicVRegister& src2) {
3455 uint64_t result[16];
3456 int laneCount = LaneCountFromFormat(vform);
3457 int pairs = laneCount / 2;
3458 for (int i = 0; i < pairs; ++i) {
3459 result[2 * i] = src1.Uint(vform, pairs + i);
3460 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
3461 }
3462
3463 dst.ClearForWrite(vform);
3464 for (int i = 0; i < laneCount; ++i) {
3465 dst.SetUint(vform, i, result[i]);
3466 }
3467 return dst;
3468 }
3469
3470
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3471 LogicVRegister Simulator::uzp1(VectorFormat vform,
3472 LogicVRegister dst,
3473 const LogicVRegister& src1,
3474 const LogicVRegister& src2) {
3475 uint64_t result[32];
3476 int laneCount = LaneCountFromFormat(vform);
3477 for (int i = 0; i < laneCount; ++i) {
3478 result[i] = src1.Uint(vform, i);
3479 result[laneCount + i] = src2.Uint(vform, i);
3480 }
3481
3482 dst.ClearForWrite(vform);
3483 for (int i = 0; i < laneCount; ++i) {
3484 dst.SetUint(vform, i, result[2 * i]);
3485 }
3486 return dst;
3487 }
3488
3489
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3490 LogicVRegister Simulator::uzp2(VectorFormat vform,
3491 LogicVRegister dst,
3492 const LogicVRegister& src1,
3493 const LogicVRegister& src2) {
3494 uint64_t result[32];
3495 int laneCount = LaneCountFromFormat(vform);
3496 for (int i = 0; i < laneCount; ++i) {
3497 result[i] = src1.Uint(vform, i);
3498 result[laneCount + i] = src2.Uint(vform, i);
3499 }
3500
3501 dst.ClearForWrite(vform);
3502 for (int i = 0; i < laneCount; ++i) {
3503 dst.SetUint(vform, i, result[ (2 * i) + 1]);
3504 }
3505 return dst;
3506 }
3507
3508
3509 template <typename T>
FPAdd(T op1,T op2)3510 T Simulator::FPAdd(T op1, T op2) {
3511 T result = FPProcessNaNs(op1, op2);
3512 if (std::isnan(result)) return result;
3513
3514 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
3515 // inf + -inf returns the default NaN.
3516 FPProcessException();
3517 return FPDefaultNaN<T>();
3518 } else {
3519 // Other cases should be handled by standard arithmetic.
3520 return op1 + op2;
3521 }
3522 }
3523
3524
3525 template <typename T>
FPSub(T op1,T op2)3526 T Simulator::FPSub(T op1, T op2) {
3527 // NaNs should be handled elsewhere.
3528 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3529
3530 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
3531 // inf - inf returns the default NaN.
3532 FPProcessException();
3533 return FPDefaultNaN<T>();
3534 } else {
3535 // Other cases should be handled by standard arithmetic.
3536 return op1 - op2;
3537 }
3538 }
3539
3540
3541 template <typename T>
FPMul(T op1,T op2)3542 T Simulator::FPMul(T op1, T op2) {
3543 // NaNs should be handled elsewhere.
3544 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3545
3546 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3547 // inf * 0.0 returns the default NaN.
3548 FPProcessException();
3549 return FPDefaultNaN<T>();
3550 } else {
3551 // Other cases should be handled by standard arithmetic.
3552 return op1 * op2;
3553 }
3554 }
3555
3556
3557 template<typename T>
FPMulx(T op1,T op2)3558 T Simulator::FPMulx(T op1, T op2) {
3559 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3560 // inf * 0.0 returns +/-2.0.
3561 T two = 2.0;
3562 return copysign(1.0, op1) * copysign(1.0, op2) * two;
3563 }
3564 return FPMul(op1, op2);
3565 }
3566
3567
3568 template<typename T>
FPMulAdd(T a,T op1,T op2)3569 T Simulator::FPMulAdd(T a, T op1, T op2) {
3570 T result = FPProcessNaNs3(a, op1, op2);
3571
3572 T sign_a = copysign(1.0, a);
3573 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
3574 bool isinf_prod = std::isinf(op1) || std::isinf(op2);
3575 bool operation_generates_nan =
3576 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
3577 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
3578 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
3579
3580 if (std::isnan(result)) {
3581 // Generated NaNs override quiet NaNs propagated from a.
3582 if (operation_generates_nan && IsQuietNaN(a)) {
3583 FPProcessException();
3584 return FPDefaultNaN<T>();
3585 } else {
3586 return result;
3587 }
3588 }
3589
3590 // If the operation would produce a NaN, return the default NaN.
3591 if (operation_generates_nan) {
3592 FPProcessException();
3593 return FPDefaultNaN<T>();
3594 }
3595
3596 // Work around broken fma implementations for exact zero results: The sign of
3597 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3598 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3599 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3600 }
3601
3602 result = FusedMultiplyAdd(op1, op2, a);
3603 VIXL_ASSERT(!std::isnan(result));
3604
3605 // Work around broken fma implementations for rounded zero results: If a is
3606 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3607 if ((a == 0.0) && (result == 0.0)) {
3608 return copysign(0.0, sign_prod);
3609 }
3610
3611 return result;
3612 }
3613
3614
3615 template <typename T>
FPDiv(T op1,T op2)3616 T Simulator::FPDiv(T op1, T op2) {
3617 // NaNs should be handled elsewhere.
3618 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3619
3620 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3621 // inf / inf and 0.0 / 0.0 return the default NaN.
3622 FPProcessException();
3623 return FPDefaultNaN<T>();
3624 } else {
3625 if (op2 == 0.0) FPProcessException();
3626
3627 // Other cases should be handled by standard arithmetic.
3628 return op1 / op2;
3629 }
3630 }
3631
3632
3633 template <typename T>
FPSqrt(T op)3634 T Simulator::FPSqrt(T op) {
3635 if (std::isnan(op)) {
3636 return FPProcessNaN(op);
3637 } else if (op < 0.0) {
3638 FPProcessException();
3639 return FPDefaultNaN<T>();
3640 } else {
3641 return sqrt(op);
3642 }
3643 }
3644
3645
3646 template <typename T>
FPMax(T a,T b)3647 T Simulator::FPMax(T a, T b) {
3648 T result = FPProcessNaNs(a, b);
3649 if (std::isnan(result)) return result;
3650
3651 if ((a == 0.0) && (b == 0.0) &&
3652 (copysign(1.0, a) != copysign(1.0, b))) {
3653 // a and b are zero, and the sign differs: return +0.0.
3654 return 0.0;
3655 } else {
3656 return (a > b) ? a : b;
3657 }
3658 }
3659
3660
3661 template <typename T>
FPMaxNM(T a,T b)3662 T Simulator::FPMaxNM(T a, T b) {
3663 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3664 a = kFP64NegativeInfinity;
3665 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3666 b = kFP64NegativeInfinity;
3667 }
3668
3669 T result = FPProcessNaNs(a, b);
3670 return std::isnan(result) ? result : FPMax(a, b);
3671 }
3672
3673
3674 template <typename T>
FPMin(T a,T b)3675 T Simulator::FPMin(T a, T b) {
3676 T result = FPProcessNaNs(a, b);
3677 if (std::isnan(result)) return result;
3678
3679 if ((a == 0.0) && (b == 0.0) &&
3680 (copysign(1.0, a) != copysign(1.0, b))) {
3681 // a and b are zero, and the sign differs: return -0.0.
3682 return -0.0;
3683 } else {
3684 return (a < b) ? a : b;
3685 }
3686 }
3687
3688
3689 template <typename T>
FPMinNM(T a,T b)3690 T Simulator::FPMinNM(T a, T b) {
3691 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3692 a = kFP64PositiveInfinity;
3693 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3694 b = kFP64PositiveInfinity;
3695 }
3696
3697 T result = FPProcessNaNs(a, b);
3698 return std::isnan(result) ? result : FPMin(a, b);
3699 }
3700
3701
3702 template <typename T>
FPRecipStepFused(T op1,T op2)3703 T Simulator::FPRecipStepFused(T op1, T op2) {
3704 const T two = 2.0;
3705 if ((std::isinf(op1) && (op2 == 0.0))
3706 || ((op1 == 0.0) && (std::isinf(op2)))) {
3707 return two;
3708 } else if (std::isinf(op1) || std::isinf(op2)) {
3709 // Return +inf if signs match, otherwise -inf.
3710 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3711 : kFP64NegativeInfinity;
3712 } else {
3713 return FusedMultiplyAdd(op1, op2, two);
3714 }
3715 }
3716
3717
3718 template <typename T>
FPRSqrtStepFused(T op1,T op2)3719 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3720 const T one_point_five = 1.5;
3721 const T two = 2.0;
3722
3723 if ((std::isinf(op1) && (op2 == 0.0))
3724 || ((op1 == 0.0) && (std::isinf(op2)))) {
3725 return one_point_five;
3726 } else if (std::isinf(op1) || std::isinf(op2)) {
3727 // Return +inf if signs match, otherwise -inf.
3728 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3729 : kFP64NegativeInfinity;
3730 } else {
3731 // The multiply-add-halve operation must be fully fused, so avoid interim
3732 // rounding by checking which operand can be losslessly divided by two
3733 // before doing the multiply-add.
3734 if (std::isnormal(op1 / two)) {
3735 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3736 } else if (std::isnormal(op2 / two)) {
3737 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3738 } else {
3739 // Neither operand is normal after halving: the result is dominated by
3740 // the addition term, so just return that.
3741 return one_point_five;
3742 }
3743 }
3744 }
3745
3746
FPRoundInt(double value,FPRounding round_mode)3747 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3748 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3749 (value == kFP64NegativeInfinity)) {
3750 return value;
3751 } else if (std::isnan(value)) {
3752 return FPProcessNaN(value);
3753 }
3754
3755 double int_result = std::floor(value);
3756 double error = value - int_result;
3757 switch (round_mode) {
3758 case FPTieAway: {
3759 // Take care of correctly handling the range ]-0.5, -0.0], which must
3760 // yield -0.0.
3761 if ((-0.5 < value) && (value < 0.0)) {
3762 int_result = -0.0;
3763
3764 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3765 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3766 // result is positive, round up.
3767 int_result++;
3768 }
3769 break;
3770 }
3771 case FPTieEven: {
3772 // Take care of correctly handling the range [-0.5, -0.0], which must
3773 // yield -0.0.
3774 if ((-0.5 <= value) && (value < 0.0)) {
3775 int_result = -0.0;
3776
3777 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3778 // result is odd, round up.
3779 } else if ((error > 0.5) ||
3780 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3781 int_result++;
3782 }
3783 break;
3784 }
3785 case FPZero: {
3786 // If value>0 then we take floor(value)
3787 // otherwise, ceil(value).
3788 if (value < 0) {
3789 int_result = ceil(value);
3790 }
3791 break;
3792 }
3793 case FPNegativeInfinity: {
3794 // We always use floor(value).
3795 break;
3796 }
3797 case FPPositiveInfinity: {
3798 // Take care of correctly handling the range ]-1.0, -0.0], which must
3799 // yield -0.0.
3800 if ((-1.0 < value) && (value < 0.0)) {
3801 int_result = -0.0;
3802
3803 // If the error is non-zero, round up.
3804 } else if (error > 0.0) {
3805 int_result++;
3806 }
3807 break;
3808 }
3809 default: VIXL_UNIMPLEMENTED();
3810 }
3811 return int_result;
3812 }
3813
3814
FPToInt32(double value,FPRounding rmode)3815 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3816 value = FPRoundInt(value, rmode);
3817 if (value >= kWMaxInt) {
3818 return kWMaxInt;
3819 } else if (value < kWMinInt) {
3820 return kWMinInt;
3821 }
3822 return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3823 }
3824
3825
FPToInt64(double value,FPRounding rmode)3826 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3827 value = FPRoundInt(value, rmode);
3828 if (value >= kXMaxInt) {
3829 return kXMaxInt;
3830 } else if (value < kXMinInt) {
3831 return kXMinInt;
3832 }
3833 return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3834 }
3835
3836
FPToUInt32(double value,FPRounding rmode)3837 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3838 value = FPRoundInt(value, rmode);
3839 if (value >= kWMaxUInt) {
3840 return kWMaxUInt;
3841 } else if (value < 0.0) {
3842 return 0;
3843 }
3844 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3845 }
3846
3847
FPToUInt64(double value,FPRounding rmode)3848 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3849 value = FPRoundInt(value, rmode);
3850 if (value >= kXMaxUInt) {
3851 return kXMaxUInt;
3852 } else if (value < 0.0) {
3853 return 0;
3854 }
3855 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3856 }
3857
3858
3859 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3860 template <typename T> \
3861 LogicVRegister Simulator::FN(VectorFormat vform, \
3862 LogicVRegister dst, \
3863 const LogicVRegister& src1, \
3864 const LogicVRegister& src2) { \
3865 dst.ClearForWrite(vform); \
3866 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3867 T op1 = src1.Float<T>(i); \
3868 T op2 = src2.Float<T>(i); \
3869 T result; \
3870 if (PROCNAN) { \
3871 result = FPProcessNaNs(op1, op2); \
3872 if (!std::isnan(result)) { \
3873 result = OP(op1, op2); \
3874 } \
3875 } else { \
3876 result = OP(op1, op2); \
3877 } \
3878 dst.SetFloat(i, result); \
3879 } \
3880 return dst; \
3881 } \
3882 \
3883 LogicVRegister Simulator::FN(VectorFormat vform, \
3884 LogicVRegister dst, \
3885 const LogicVRegister& src1, \
3886 const LogicVRegister& src2) { \
3887 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
3888 FN<float>(vform, dst, src1, src2); \
3889 } else { \
3890 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
3891 FN<double>(vform, dst, src1, src2); \
3892 } \
3893 return dst; \
3894 }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)3895 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3896 #undef DEFINE_NEON_FP_VECTOR_OP
3897
3898
3899 LogicVRegister Simulator::fnmul(VectorFormat vform,
3900 LogicVRegister dst,
3901 const LogicVRegister& src1,
3902 const LogicVRegister& src2) {
3903 SimVRegister temp;
3904 LogicVRegister product = fmul(vform, temp, src1, src2);
3905 return fneg(vform, dst, product);
3906 }
3907
3908
3909 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3910 LogicVRegister Simulator::frecps(VectorFormat vform,
3911 LogicVRegister dst,
3912 const LogicVRegister& src1,
3913 const LogicVRegister& src2) {
3914 dst.ClearForWrite(vform);
3915 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3916 T op1 = -src1.Float<T>(i);
3917 T op2 = src2.Float<T>(i);
3918 T result = FPProcessNaNs(op1, op2);
3919 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3920 }
3921 return dst;
3922 }
3923
3924
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3925 LogicVRegister Simulator::frecps(VectorFormat vform,
3926 LogicVRegister dst,
3927 const LogicVRegister& src1,
3928 const LogicVRegister& src2) {
3929 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
3930 frecps<float>(vform, dst, src1, src2);
3931 } else {
3932 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
3933 frecps<double>(vform, dst, src1, src2);
3934 }
3935 return dst;
3936 }
3937
3938
3939 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3940 LogicVRegister Simulator::frsqrts(VectorFormat vform,
3941 LogicVRegister dst,
3942 const LogicVRegister& src1,
3943 const LogicVRegister& src2) {
3944 dst.ClearForWrite(vform);
3945 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3946 T op1 = -src1.Float<T>(i);
3947 T op2 = src2.Float<T>(i);
3948 T result = FPProcessNaNs(op1, op2);
3949 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3950 }
3951 return dst;
3952 }
3953
3954
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3955 LogicVRegister Simulator::frsqrts(VectorFormat vform,
3956 LogicVRegister dst,
3957 const LogicVRegister& src1,
3958 const LogicVRegister& src2) {
3959 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
3960 frsqrts<float>(vform, dst, src1, src2);
3961 } else {
3962 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
3963 frsqrts<double>(vform, dst, src1, src2);
3964 }
3965 return dst;
3966 }
3967
3968
3969 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3970 LogicVRegister Simulator::fcmp(VectorFormat vform,
3971 LogicVRegister dst,
3972 const LogicVRegister& src1,
3973 const LogicVRegister& src2,
3974 Condition cond) {
3975 dst.ClearForWrite(vform);
3976 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3977 bool result = false;
3978 T op1 = src1.Float<T>(i);
3979 T op2 = src2.Float<T>(i);
3980 T nan_result = FPProcessNaNs(op1, op2);
3981 if (!std::isnan(nan_result)) {
3982 switch (cond) {
3983 case eq: result = (op1 == op2); break;
3984 case ge: result = (op1 >= op2); break;
3985 case gt: result = (op1 > op2) ; break;
3986 case le: result = (op1 <= op2); break;
3987 case lt: result = (op1 < op2) ; break;
3988 default: VIXL_UNREACHABLE(); break;
3989 }
3990 }
3991 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3992 }
3993 return dst;
3994 }
3995
3996
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3997 LogicVRegister Simulator::fcmp(VectorFormat vform,
3998 LogicVRegister dst,
3999 const LogicVRegister& src1,
4000 const LogicVRegister& src2,
4001 Condition cond) {
4002 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4003 fcmp<float>(vform, dst, src1, src2, cond);
4004 } else {
4005 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4006 fcmp<double>(vform, dst, src1, src2, cond);
4007 }
4008 return dst;
4009 }
4010
4011
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)4012 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4013 LogicVRegister dst,
4014 const LogicVRegister& src,
4015 Condition cond) {
4016 SimVRegister temp;
4017 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4018 LogicVRegister zero_reg = dup_immediate(vform, temp, float_to_rawbits(0.0));
4019 fcmp<float>(vform, dst, src, zero_reg, cond);
4020 } else {
4021 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4022 LogicVRegister zero_reg = dup_immediate(vform, temp,
4023 double_to_rawbits(0.0));
4024 fcmp<double>(vform, dst, src, zero_reg, cond);
4025 }
4026 return dst;
4027 }
4028
4029
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4030 LogicVRegister Simulator::fabscmp(VectorFormat vform,
4031 LogicVRegister dst,
4032 const LogicVRegister& src1,
4033 const LogicVRegister& src2,
4034 Condition cond) {
4035 SimVRegister temp1, temp2;
4036 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4037 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
4038 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
4039 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
4040 } else {
4041 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4042 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
4043 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
4044 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
4045 }
4046 return dst;
4047 }
4048
4049
4050 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4051 LogicVRegister Simulator::fmla(VectorFormat vform,
4052 LogicVRegister dst,
4053 const LogicVRegister& src1,
4054 const LogicVRegister& src2) {
4055 dst.ClearForWrite(vform);
4056 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4057 T op1 = src1.Float<T>(i);
4058 T op2 = src2.Float<T>(i);
4059 T acc = dst.Float<T>(i);
4060 T result = FPMulAdd(acc, op1, op2);
4061 dst.SetFloat(i, result);
4062 }
4063 return dst;
4064 }
4065
4066
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4067 LogicVRegister Simulator::fmla(VectorFormat vform,
4068 LogicVRegister dst,
4069 const LogicVRegister& src1,
4070 const LogicVRegister& src2) {
4071 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4072 fmla<float>(vform, dst, src1, src2);
4073 } else {
4074 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4075 fmla<double>(vform, dst, src1, src2);
4076 }
4077 return dst;
4078 }
4079
4080
4081 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4082 LogicVRegister Simulator::fmls(VectorFormat vform,
4083 LogicVRegister dst,
4084 const LogicVRegister& src1,
4085 const LogicVRegister& src2) {
4086 dst.ClearForWrite(vform);
4087 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4088 T op1 = -src1.Float<T>(i);
4089 T op2 = src2.Float<T>(i);
4090 T acc = dst.Float<T>(i);
4091 T result = FPMulAdd(acc, op1, op2);
4092 dst.SetFloat(i, result);
4093 }
4094 return dst;
4095 }
4096
4097
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4098 LogicVRegister Simulator::fmls(VectorFormat vform,
4099 LogicVRegister dst,
4100 const LogicVRegister& src1,
4101 const LogicVRegister& src2) {
4102 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4103 fmls<float>(vform, dst, src1, src2);
4104 } else {
4105 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4106 fmls<double>(vform, dst, src1, src2);
4107 }
4108 return dst;
4109 }
4110
4111
4112 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4113 LogicVRegister Simulator::fneg(VectorFormat vform,
4114 LogicVRegister dst,
4115 const LogicVRegister& src) {
4116 dst.ClearForWrite(vform);
4117 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4118 T op = src.Float<T>(i);
4119 op = -op;
4120 dst.SetFloat(i, op);
4121 }
4122 return dst;
4123 }
4124
4125
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4126 LogicVRegister Simulator::fneg(VectorFormat vform,
4127 LogicVRegister dst,
4128 const LogicVRegister& src) {
4129 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4130 fneg<float>(vform, dst, src);
4131 } else {
4132 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4133 fneg<double>(vform, dst, src);
4134 }
4135 return dst;
4136 }
4137
4138
4139 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4140 LogicVRegister Simulator::fabs_(VectorFormat vform,
4141 LogicVRegister dst,
4142 const LogicVRegister& src) {
4143 dst.ClearForWrite(vform);
4144 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4145 T op = src.Float<T>(i);
4146 if (copysign(1.0, op) < 0.0) {
4147 op = -op;
4148 }
4149 dst.SetFloat(i, op);
4150 }
4151 return dst;
4152 }
4153
4154
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4155 LogicVRegister Simulator::fabs_(VectorFormat vform,
4156 LogicVRegister dst,
4157 const LogicVRegister& src) {
4158 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4159 fabs_<float>(vform, dst, src);
4160 } else {
4161 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4162 fabs_<double>(vform, dst, src);
4163 }
4164 return dst;
4165 }
4166
4167
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4168 LogicVRegister Simulator::fabd(VectorFormat vform,
4169 LogicVRegister dst,
4170 const LogicVRegister& src1,
4171 const LogicVRegister& src2) {
4172 SimVRegister temp;
4173 fsub(vform, temp, src1, src2);
4174 fabs_(vform, dst, temp);
4175 return dst;
4176 }
4177
4178
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4179 LogicVRegister Simulator::fsqrt(VectorFormat vform,
4180 LogicVRegister dst,
4181 const LogicVRegister& src) {
4182 dst.ClearForWrite(vform);
4183 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4184 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4185 float result = FPSqrt(src.Float<float>(i));
4186 dst.SetFloat(i, result);
4187 }
4188 } else {
4189 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4190 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4191 double result = FPSqrt(src.Float<double>(i));
4192 dst.SetFloat(i, result);
4193 }
4194 }
4195 return dst;
4196 }
4197
4198
4199 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
4200 LogicVRegister Simulator::FNP(VectorFormat vform, \
4201 LogicVRegister dst, \
4202 const LogicVRegister& src1, \
4203 const LogicVRegister& src2) { \
4204 SimVRegister temp1, temp2; \
4205 uzp1(vform, temp1, src1, src2); \
4206 uzp2(vform, temp2, src1, src2); \
4207 FN(vform, dst, temp1, temp2); \
4208 return dst; \
4209 } \
4210 \
4211 LogicVRegister Simulator::FNP(VectorFormat vform, \
4212 LogicVRegister dst, \
4213 const LogicVRegister& src) { \
4214 if (vform == kFormatS) { \
4215 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
4216 dst.SetFloat(0, result); \
4217 } else { \
4218 VIXL_ASSERT(vform == kFormatD); \
4219 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
4220 dst.SetFloat(0, result); \
4221 } \
4222 dst.ClearForWrite(vform); \
4223 return dst; \
4224 }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)4225 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
4226 #undef DEFINE_NEON_FP_PAIR_OP
4227
4228
4229 LogicVRegister Simulator::fminmaxv(VectorFormat vform,
4230 LogicVRegister dst,
4231 const LogicVRegister& src,
4232 FPMinMaxOp Op) {
4233 VIXL_ASSERT(vform == kFormat4S);
4234 USE(vform);
4235 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
4236 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
4237 float result = (this->*Op)(result1, result2);
4238 dst.ClearForWrite(kFormatS);
4239 dst.SetFloat<float>(0, result);
4240 return dst;
4241 }
4242
4243
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4244 LogicVRegister Simulator::fmaxv(VectorFormat vform,
4245 LogicVRegister dst,
4246 const LogicVRegister& src) {
4247 return fminmaxv(vform, dst, src, &Simulator::FPMax);
4248 }
4249
4250
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4251 LogicVRegister Simulator::fminv(VectorFormat vform,
4252 LogicVRegister dst,
4253 const LogicVRegister& src) {
4254 return fminmaxv(vform, dst, src, &Simulator::FPMin);
4255 }
4256
4257
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4258 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
4259 LogicVRegister dst,
4260 const LogicVRegister& src) {
4261 return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
4262 }
4263
4264
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4265 LogicVRegister Simulator::fminnmv(VectorFormat vform,
4266 LogicVRegister dst,
4267 const LogicVRegister& src) {
4268 return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
4269 }
4270
4271
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4272 LogicVRegister Simulator::fmul(VectorFormat vform,
4273 LogicVRegister dst,
4274 const LogicVRegister& src1,
4275 const LogicVRegister& src2,
4276 int index) {
4277 dst.ClearForWrite(vform);
4278 SimVRegister temp;
4279 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4280 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4281 fmul<float>(vform, dst, src1, index_reg);
4282
4283 } else {
4284 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4285 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4286 fmul<double>(vform, dst, src1, index_reg);
4287 }
4288 return dst;
4289 }
4290
4291
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4292 LogicVRegister Simulator::fmla(VectorFormat vform,
4293 LogicVRegister dst,
4294 const LogicVRegister& src1,
4295 const LogicVRegister& src2,
4296 int index) {
4297 dst.ClearForWrite(vform);
4298 SimVRegister temp;
4299 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4300 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4301 fmla<float>(vform, dst, src1, index_reg);
4302
4303 } else {
4304 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4305 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4306 fmla<double>(vform, dst, src1, index_reg);
4307 }
4308 return dst;
4309 }
4310
4311
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4312 LogicVRegister Simulator::fmls(VectorFormat vform,
4313 LogicVRegister dst,
4314 const LogicVRegister& src1,
4315 const LogicVRegister& src2,
4316 int index) {
4317 dst.ClearForWrite(vform);
4318 SimVRegister temp;
4319 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4320 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4321 fmls<float>(vform, dst, src1, index_reg);
4322
4323 } else {
4324 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4325 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4326 fmls<double>(vform, dst, src1, index_reg);
4327 }
4328 return dst;
4329 }
4330
4331
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4332 LogicVRegister Simulator::fmulx(VectorFormat vform,
4333 LogicVRegister dst,
4334 const LogicVRegister& src1,
4335 const LogicVRegister& src2,
4336 int index) {
4337 dst.ClearForWrite(vform);
4338 SimVRegister temp;
4339 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4340 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4341 fmulx<float>(vform, dst, src1, index_reg);
4342
4343 } else {
4344 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4345 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4346 fmulx<double>(vform, dst, src1, index_reg);
4347 }
4348 return dst;
4349 }
4350
4351
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception)4352 LogicVRegister Simulator::frint(VectorFormat vform,
4353 LogicVRegister dst,
4354 const LogicVRegister& src,
4355 FPRounding rounding_mode,
4356 bool inexact_exception) {
4357 dst.ClearForWrite(vform);
4358 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4359 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4360 float input = src.Float<float>(i);
4361 float rounded = FPRoundInt(input, rounding_mode);
4362 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4363 FPProcessException();
4364 }
4365 dst.SetFloat<float>(i, rounded);
4366 }
4367 } else {
4368 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4369 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4370 double input = src.Float<double>(i);
4371 double rounded = FPRoundInt(input, rounding_mode);
4372 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4373 FPProcessException();
4374 }
4375 dst.SetFloat<double>(i, rounded);
4376 }
4377 }
4378 return dst;
4379 }
4380
4381
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4382 LogicVRegister Simulator::fcvts(VectorFormat vform,
4383 LogicVRegister dst,
4384 const LogicVRegister& src,
4385 FPRounding rounding_mode,
4386 int fbits) {
4387 dst.ClearForWrite(vform);
4388 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4389 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4390 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4391 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
4392 }
4393 } else {
4394 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4395 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4396 double op = src.Float<double>(i) * std::pow(2.0, fbits);
4397 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
4398 }
4399 }
4400 return dst;
4401 }
4402
4403
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4404 LogicVRegister Simulator::fcvtu(VectorFormat vform,
4405 LogicVRegister dst,
4406 const LogicVRegister& src,
4407 FPRounding rounding_mode,
4408 int fbits) {
4409 dst.ClearForWrite(vform);
4410 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4411 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4412 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4413 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
4414 }
4415 } else {
4416 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4417 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4418 double op = src.Float<double>(i) * std::pow(2.0, fbits);
4419 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
4420 }
4421 }
4422 return dst;
4423 }
4424
4425
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4426 LogicVRegister Simulator::fcvtl(VectorFormat vform,
4427 LogicVRegister dst,
4428 const LogicVRegister& src) {
4429 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4430 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4431 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
4432 }
4433 } else {
4434 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4435 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4436 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
4437 }
4438 }
4439 return dst;
4440 }
4441
4442
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4443 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
4444 LogicVRegister dst,
4445 const LogicVRegister& src) {
4446 int lane_count = LaneCountFromFormat(vform);
4447 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4448 for (int i = 0; i < lane_count; i++) {
4449 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
4450 }
4451 } else {
4452 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4453 for (int i = 0; i < lane_count; i++) {
4454 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
4455 }
4456 }
4457 return dst;
4458 }
4459
4460
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4461 LogicVRegister Simulator::fcvtn(VectorFormat vform,
4462 LogicVRegister dst,
4463 const LogicVRegister& src) {
4464 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4465 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4466 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
4467 }
4468 } else {
4469 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4470 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4471 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
4472 }
4473 }
4474 return dst;
4475 }
4476
4477
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4478 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
4479 LogicVRegister dst,
4480 const LogicVRegister& src) {
4481 int lane_count = LaneCountFromFormat(vform) / 2;
4482 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4483 for (int i = lane_count - 1; i >= 0; i--) {
4484 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
4485 }
4486 } else {
4487 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4488 for (int i = lane_count - 1; i >= 0; i--) {
4489 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
4490 }
4491 }
4492 return dst;
4493 }
4494
4495
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4496 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
4497 LogicVRegister dst,
4498 const LogicVRegister& src) {
4499 dst.ClearForWrite(vform);
4500 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4501 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4502 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
4503 }
4504 return dst;
4505 }
4506
4507
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4508 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
4509 LogicVRegister dst,
4510 const LogicVRegister& src) {
4511 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4512 int lane_count = LaneCountFromFormat(vform) / 2;
4513 for (int i = lane_count - 1; i >= 0; i--) {
4514 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
4515 }
4516 return dst;
4517 }
4518
4519
4520 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)4521 double Simulator::recip_sqrt_estimate(double a) {
4522 int q0, q1, s;
4523 double r;
4524 if (a < 0.5) {
4525 q0 = static_cast<int>(a * 512.0);
4526 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
4527 } else {
4528 q1 = static_cast<int>(a * 256.0);
4529 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
4530 }
4531 s = static_cast<int>(256.0 * r + 0.5);
4532 return static_cast<double>(s) / 256.0;
4533 }
4534
4535
Bits(uint64_t val,int start_bit,int end_bit)4536 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
4537 return unsigned_bitextract_64(start_bit, end_bit, val);
4538 }
4539
4540
4541 template <typename T>
FPRecipSqrtEstimate(T op)4542 T Simulator::FPRecipSqrtEstimate(T op) {
4543 if (std::isnan(op)) {
4544 return FPProcessNaN(op);
4545 } else if (op == 0.0) {
4546 if (copysign(1.0, op) < 0.0) {
4547 return kFP64NegativeInfinity;
4548 } else {
4549 return kFP64PositiveInfinity;
4550 }
4551 } else if (copysign(1.0, op) < 0.0) {
4552 FPProcessException();
4553 return FPDefaultNaN<T>();
4554 } else if (std::isinf(op)) {
4555 return 0.0;
4556 } else {
4557 uint64_t fraction;
4558 int exp, result_exp;
4559
4560 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4561 exp = float_exp(op);
4562 fraction = float_mantissa(op);
4563 fraction <<= 29;
4564 } else {
4565 exp = double_exp(op);
4566 fraction = double_mantissa(op);
4567 }
4568
4569 if (exp == 0) {
4570 while (Bits(fraction, 51, 51) == 0) {
4571 fraction = Bits(fraction, 50, 0) << 1;
4572 exp -= 1;
4573 }
4574 fraction = Bits(fraction, 50, 0) << 1;
4575 }
4576
4577 double scaled;
4578 if (Bits(exp, 0, 0) == 0) {
4579 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4580 } else {
4581 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
4582 }
4583
4584 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4585 result_exp = (380 - exp) / 2;
4586 } else {
4587 result_exp = (3068 - exp) / 2;
4588 }
4589
4590 double estimate = recip_sqrt_estimate(scaled);
4591
4592 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4593 return float_pack(0, Bits(result_exp, 7, 0),
4594 Bits(double_to_rawbits(estimate), 51, 29));
4595 } else {
4596 return double_pack(0, Bits(result_exp, 10, 0),
4597 Bits(double_to_rawbits(estimate), 51, 0));
4598 }
4599 }
4600 }
4601
4602
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4603 LogicVRegister Simulator::frsqrte(VectorFormat vform,
4604 LogicVRegister dst,
4605 const LogicVRegister& src) {
4606 dst.ClearForWrite(vform);
4607 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4608 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4609 float input = src.Float<float>(i);
4610 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
4611 }
4612 } else {
4613 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4614 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4615 double input = src.Float<double>(i);
4616 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
4617 }
4618 }
4619 return dst;
4620 }
4621
4622 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)4623 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
4624 uint32_t sign;
4625
4626 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4627 sign = float_sign(op);
4628 } else {
4629 sign = double_sign(op);
4630 }
4631
4632 if (std::isnan(op)) {
4633 return FPProcessNaN(op);
4634 } else if (std::isinf(op)) {
4635 return (sign == 1) ? -0.0 : 0.0;
4636 } else if (op == 0.0) {
4637 FPProcessException(); // FPExc_DivideByZero exception.
4638 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4639 } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof)
4640 (std::fabs(op) < std::pow(2.0, -128.0))) ||
4641 ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof)
4642 (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4643 bool overflow_to_inf = false;
4644 switch (rounding) {
4645 case FPTieEven: overflow_to_inf = true; break;
4646 case FPPositiveInfinity: overflow_to_inf = (sign == 0); break;
4647 case FPNegativeInfinity: overflow_to_inf = (sign == 1); break;
4648 case FPZero: overflow_to_inf = false; break;
4649 default: break;
4650 }
4651 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
4652 if (overflow_to_inf) {
4653 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4654 } else {
4655 // Return FPMaxNormal(sign).
4656 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4657 return float_pack(sign, 0xfe, 0x07fffff);
4658 } else {
4659 return double_pack(sign, 0x7fe, 0x0fffffffffffffl);
4660 }
4661 }
4662 } else {
4663 uint64_t fraction;
4664 int exp, result_exp;
4665 uint32_t sign;
4666
4667 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4668 sign = float_sign(op);
4669 exp = float_exp(op);
4670 fraction = float_mantissa(op);
4671 fraction <<= 29;
4672 } else {
4673 sign = double_sign(op);
4674 exp = double_exp(op);
4675 fraction = double_mantissa(op);
4676 }
4677
4678 if (exp == 0) {
4679 if (Bits(fraction, 51, 51) == 0) {
4680 exp -= 1;
4681 fraction = Bits(fraction, 49, 0) << 2;
4682 } else {
4683 fraction = Bits(fraction, 50, 0) << 1;
4684 }
4685 }
4686
4687 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4688
4689 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4690 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
4691 } else {
4692 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
4693 }
4694
4695 double estimate = recip_estimate(scaled);
4696
4697 fraction = double_mantissa(estimate);
4698 if (result_exp == 0) {
4699 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4700 } else if (result_exp == -1) {
4701 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4702 result_exp = 0;
4703 }
4704 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4705 return float_pack(sign, Bits(result_exp, 7, 0), Bits(fraction, 51, 29));
4706 } else {
4707 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4708 }
4709 }
4710 }
4711
4712
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)4713 LogicVRegister Simulator::frecpe(VectorFormat vform,
4714 LogicVRegister dst,
4715 const LogicVRegister& src,
4716 FPRounding round) {
4717 dst.ClearForWrite(vform);
4718 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4719 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4720 float input = src.Float<float>(i);
4721 dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4722 }
4723 } else {
4724 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4725 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4726 double input = src.Float<double>(i);
4727 dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4728 }
4729 }
4730 return dst;
4731 }
4732
4733
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4734 LogicVRegister Simulator::ursqrte(VectorFormat vform,
4735 LogicVRegister dst,
4736 const LogicVRegister& src) {
4737 dst.ClearForWrite(vform);
4738 uint32_t operand, result;
4739 double dp_operand, dp_result;
4740 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4741 operand = src.Uint(vform, i);
4742 if (operand <= 0x3FFFFFFF) {
4743 result = 0xFFFFFFFF;
4744 } else {
4745 dp_operand = operand * std::pow(2.0, -32);
4746 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4747 result = static_cast<uint32_t>(dp_result);
4748 }
4749 dst.SetUint(vform, i, result);
4750 }
4751 return dst;
4752 }
4753
4754
4755 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)4756 double Simulator::recip_estimate(double a) {
4757 int q, s;
4758 double r;
4759 q = static_cast<int>(a * 512.0);
4760 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4761 s = static_cast<int>(256.0 * r + 0.5);
4762 return static_cast<double>(s) / 256.0;
4763 }
4764
4765
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4766 LogicVRegister Simulator::urecpe(VectorFormat vform,
4767 LogicVRegister dst,
4768 const LogicVRegister& src) {
4769 dst.ClearForWrite(vform);
4770 uint32_t operand, result;
4771 double dp_operand, dp_result;
4772 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4773 operand = src.Uint(vform, i);
4774 if (operand <= 0x7FFFFFFF) {
4775 result = 0xFFFFFFFF;
4776 } else {
4777 dp_operand = operand * std::pow(2.0, -32);
4778 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4779 result = static_cast<uint32_t>(dp_result);
4780 }
4781 dst.SetUint(vform, i, result);
4782 }
4783 return dst;
4784 }
4785
4786 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4787 LogicVRegister Simulator::frecpx(VectorFormat vform,
4788 LogicVRegister dst,
4789 const LogicVRegister& src) {
4790 dst.ClearForWrite(vform);
4791 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4792 T op = src.Float<T>(i);
4793 T result;
4794 if (std::isnan(op)) {
4795 result = FPProcessNaN(op);
4796 } else {
4797 int exp;
4798 uint32_t sign;
4799 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4800 sign = float_sign(op);
4801 exp = float_exp(op);
4802 exp = (exp == 0) ? (0xFF - 1) : Bits(~exp, 7, 0);
4803 result = float_pack(sign, exp, 0);
4804 } else {
4805 sign = double_sign(op);
4806 exp = double_exp(op);
4807 exp = (exp == 0) ? (0x7FF - 1) : Bits(~exp, 10, 0);
4808 result = double_pack(sign, exp, 0);
4809 }
4810 }
4811 dst.SetFloat(i, result);
4812 }
4813 return dst;
4814 }
4815
4816
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4817 LogicVRegister Simulator::frecpx(VectorFormat vform,
4818 LogicVRegister dst,
4819 const LogicVRegister& src) {
4820 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4821 frecpx<float>(vform, dst, src);
4822 } else {
4823 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4824 frecpx<double>(vform, dst, src);
4825 }
4826 return dst;
4827 }
4828
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4829 LogicVRegister Simulator::scvtf(VectorFormat vform,
4830 LogicVRegister dst,
4831 const LogicVRegister& src,
4832 int fbits,
4833 FPRounding round) {
4834 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4835 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4836 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4837 dst.SetFloat<float>(i, result);
4838 } else {
4839 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4840 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4841 dst.SetFloat<double>(i, result);
4842 }
4843 }
4844 return dst;
4845 }
4846
4847
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4848 LogicVRegister Simulator::ucvtf(VectorFormat vform,
4849 LogicVRegister dst,
4850 const LogicVRegister& src,
4851 int fbits,
4852 FPRounding round) {
4853 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4854 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4855 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4856 dst.SetFloat<float>(i, result);
4857 } else {
4858 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4859 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4860 dst.SetFloat<double>(i, result);
4861 }
4862 }
4863 return dst;
4864 }
4865
4866
4867 } // namespace vixl
4868