1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #if V8_TARGET_ARCH_ARM64
6 
7 #include <cmath>
8 #include "src/arm64/simulator-arm64.h"
9 
10 namespace v8 {
11 namespace internal {
12 
13 #if defined(USE_SIMULATOR)
14 
15 namespace {
16 
17 // See FPRound for a description of this function.
FPRoundToDouble(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)18 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,
19                               FPRounding round_mode) {
20   uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
21       sign, exponent, mantissa, round_mode);
22   return bit_cast<double>(bits);
23 }
24 
25 // See FPRound for a description of this function.
FPRoundToFloat(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)26 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,
27                             FPRounding round_mode) {
28   uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
29       sign, exponent, mantissa, round_mode);
30   return bit_cast<float>(bits);
31 }
32 
33 // See FPRound for a description of this function.
FPRoundToFloat16(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)34 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,
35                                 uint64_t mantissa, FPRounding round_mode) {
36   return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
37       sign, exponent, mantissa, round_mode);
38 }
39 
40 }  // namespace
41 
FixedToDouble(int64_t src,int fbits,FPRounding round)42 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
43   if (src >= 0) {
44     return UFixedToDouble(src, fbits, round);
45   } else if (src == INT64_MIN) {
46     return -UFixedToDouble(src, fbits, round);
47   } else {
48     return -UFixedToDouble(-src, fbits, round);
49   }
50 }
51 
UFixedToDouble(uint64_t src,int fbits,FPRounding round)52 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
53   // An input of 0 is a special case because the result is effectively
54   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
55   if (src == 0) {
56     return 0.0;
57   }
58 
59   // Calculate the exponent. The highest significant bit will have the value
60   // 2^exponent.
61   const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
62   const int64_t exponent = highest_significant_bit - fbits;
63 
64   return FPRoundToDouble(0, exponent, src, round);
65 }
66 
FixedToFloat(int64_t src,int fbits,FPRounding round)67 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
68   if (src >= 0) {
69     return UFixedToFloat(src, fbits, round);
70   } else if (src == INT64_MIN) {
71     return -UFixedToFloat(src, fbits, round);
72   } else {
73     return -UFixedToFloat(-src, fbits, round);
74   }
75 }
76 
UFixedToFloat(uint64_t src,int fbits,FPRounding round)77 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
78   // An input of 0 is a special case because the result is effectively
79   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
80   if (src == 0) {
81     return 0.0f;
82   }
83 
84   // Calculate the exponent. The highest significant bit will have the value
85   // 2^exponent.
86   const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
87   const int32_t exponent = highest_significant_bit - fbits;
88 
89   return FPRoundToFloat(0, exponent, src, round);
90 }
91 
FPToDouble(float value)92 double Simulator::FPToDouble(float value) {
93   switch (std::fpclassify(value)) {
94     case FP_NAN: {
95       if (IsSignallingNaN(value)) {
96         FPProcessException();
97       }
98       if (DN()) return kFP64DefaultNaN;
99 
100       // Convert NaNs as the processor would:
101       //  - The sign is propagated.
102       //  - The mantissa is transferred entirely, except that the top bit is
103       //    forced to '1', making the result a quiet NaN. The unused (low-order)
104       //    mantissa bits are set to 0.
105       uint32_t raw = bit_cast<uint32_t>(value);
106 
107       uint64_t sign = raw >> 31;
108       uint64_t exponent = (1 << kDoubleExponentBits) - 1;
109       uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);
110 
111       // Unused low-order bits remain zero.
112       mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits);
113 
114       // Force a quiet NaN.
115       mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1));
116 
117       return double_pack(sign, exponent, mantissa);
118     }
119 
120     case FP_ZERO:
121     case FP_NORMAL:
122     case FP_SUBNORMAL:
123     case FP_INFINITE: {
124       // All other inputs are preserved in a standard cast, because every value
125       // representable using an IEEE-754 float is also representable using an
126       // IEEE-754 double.
127       return static_cast<double>(value);
128     }
129   }
130 
131   UNREACHABLE();
132 }
133 
FPToFloat(float16 value)134 float Simulator::FPToFloat(float16 value) {
135   uint32_t sign = value >> 15;
136   uint32_t exponent =
137       unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
138                              kFloat16MantissaBits, value);
139   uint32_t mantissa =
140       unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);
141 
142   switch (float16classify(value)) {
143     case FP_ZERO:
144       return (sign == 0) ? 0.0f : -0.0f;
145 
146     case FP_INFINITE:
147       return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
148 
149     case FP_SUBNORMAL: {
150       // Calculate shift required to put mantissa into the most-significant bits
151       // of the destination mantissa.
152       int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
153 
154       // Shift mantissa and discard implicit '1'.
155       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
156       mantissa &= (1 << kFloatMantissaBits) - 1;
157 
158       // Adjust the exponent for the shift applied, and rebias.
159       exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);
160       break;
161     }
162 
163     case FP_NAN: {
164       if (IsSignallingNaN(value)) {
165         FPProcessException();
166       }
167       if (DN()) return kFP32DefaultNaN;
168 
169       // Convert NaNs as the processor would:
170       //  - The sign is propagated.
171       //  - The mantissa is transferred entirely, except that the top bit is
172       //    forced to '1', making the result a quiet NaN. The unused (low-order)
173       //    mantissa bits are set to 0.
174       exponent = (1 << kFloatExponentBits) - 1;
175 
176       // Increase bits in mantissa, making low-order bits 0.
177       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
178       mantissa |= 1 << (kFloatMantissaBits - 1);  // Force a quiet NaN.
179       break;
180     }
181 
182     case FP_NORMAL: {
183       // Increase bits in mantissa, making low-order bits 0.
184       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
185 
186       // Change exponent bias.
187       exponent += (kFloatExponentBias - kFloat16ExponentBias);
188       break;
189     }
190 
191     default:
192       UNREACHABLE();
193   }
194   return float_pack(sign, exponent, mantissa);
195 }
196 
FPToFloat16(float value,FPRounding round_mode)197 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
198   // Only the FPTieEven rounding mode is implemented.
199   DCHECK_EQ(round_mode, FPTieEven);
200   USE(round_mode);
201 
202   int64_t sign = float_sign(value);
203   int64_t exponent =
204       static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;
205   uint32_t mantissa = float_mantissa(value);
206 
207   switch (std::fpclassify(value)) {
208     case FP_NAN: {
209       if (IsSignallingNaN(value)) {
210         FPProcessException();
211       }
212       if (DN()) return kFP16DefaultNaN;
213 
214       // Convert NaNs as the processor would:
215       //  - The sign is propagated.
216       //  - The mantissa is transferred as much as possible, except that the top
217       //    bit is forced to '1', making the result a quiet NaN.
218       float16 result =
219           (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
220       result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
221       result |= (1 << (kFloat16MantissaBits - 1));  // Force a quiet NaN;
222       return result;
223     }
224 
225     case FP_ZERO:
226       return (sign == 0) ? 0 : 0x8000;
227 
228     case FP_INFINITE:
229       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
230 
231     case FP_NORMAL:
232     case FP_SUBNORMAL: {
233       // Convert float-to-half as the processor would, assuming that FPCR.FZ
234       // (flush-to-zero) is not set.
235 
236       // Add the implicit '1' bit to the mantissa.
237       mantissa += (1 << kFloatMantissaBits);
238       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
239     }
240   }
241 
242   UNREACHABLE();
243 }
244 
FPToFloat16(double value,FPRounding round_mode)245 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
246   // Only the FPTieEven rounding mode is implemented.
247   DCHECK_EQ(round_mode, FPTieEven);
248   USE(round_mode);
249 
250   int64_t sign = double_sign(value);
251   int64_t exponent =
252       static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
253   uint64_t mantissa = double_mantissa(value);
254 
255   switch (std::fpclassify(value)) {
256     case FP_NAN: {
257       if (IsSignallingNaN(value)) {
258         FPProcessException();
259       }
260       if (DN()) return kFP16DefaultNaN;
261 
262       // Convert NaNs as the processor would:
263       //  - The sign is propagated.
264       //  - The mantissa is transferred as much as possible, except that the top
265       //    bit is forced to '1', making the result a quiet NaN.
266       float16 result =
267           (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
268       result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
269       result |= (1 << (kFloat16MantissaBits - 1));  // Force a quiet NaN;
270       return result;
271     }
272 
273     case FP_ZERO:
274       return (sign == 0) ? 0 : 0x8000;
275 
276     case FP_INFINITE:
277       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
278 
279     case FP_NORMAL:
280     case FP_SUBNORMAL: {
281       // Convert double-to-half as the processor would, assuming that FPCR.FZ
282       // (flush-to-zero) is not set.
283 
284       // Add the implicit '1' bit to the mantissa.
285       mantissa += (UINT64_C(1) << kDoubleMantissaBits);
286       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
287     }
288   }
289 
290   UNREACHABLE();
291 }
292 
FPToFloat(double value,FPRounding round_mode)293 float Simulator::FPToFloat(double value, FPRounding round_mode) {
294   // Only the FPTieEven rounding mode is implemented.
295   DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
296   USE(round_mode);
297 
298   switch (std::fpclassify(value)) {
299     case FP_NAN: {
300       if (IsSignallingNaN(value)) {
301         FPProcessException();
302       }
303       if (DN()) return kFP32DefaultNaN;
304 
305       // Convert NaNs as the processor would:
306       //  - The sign is propagated.
307       //  - The mantissa is transferred as much as possible, except that the
308       //    top bit is forced to '1', making the result a quiet NaN.
309 
310       uint64_t raw = bit_cast<uint64_t>(value);
311 
312       uint32_t sign = raw >> 63;
313       uint32_t exponent = (1 << 8) - 1;
314       uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(
315           50, kDoubleMantissaBits - kFloatMantissaBits, raw));
316       mantissa |= (1 << (kFloatMantissaBits - 1));  // Force a quiet NaN.
317 
318       return float_pack(sign, exponent, mantissa);
319     }
320 
321     case FP_ZERO:
322     case FP_INFINITE: {
323       // In a C++ cast, any value representable in the target type will be
324       // unchanged. This is always the case for +/-0.0 and infinities.
325       return static_cast<float>(value);
326     }
327 
328     case FP_NORMAL:
329     case FP_SUBNORMAL: {
330       // Convert double-to-float as the processor would, assuming that FPCR.FZ
331       // (flush-to-zero) is not set.
332       uint32_t sign = double_sign(value);
333       int64_t exponent =
334           static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
335       uint64_t mantissa = double_mantissa(value);
336       if (std::fpclassify(value) == FP_NORMAL) {
337         // For normal FP values, add the hidden bit.
338         mantissa |= (UINT64_C(1) << kDoubleMantissaBits);
339       }
340       return FPRoundToFloat(sign, exponent, mantissa, round_mode);
341     }
342   }
343 
344   UNREACHABLE();
345 }
346 
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)347 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
348   dst.ClearForWrite(vform);
349   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
350     dst.ReadUintFromMem(vform, i, addr);
351     addr += LaneSizeInBytesFromFormat(vform);
352   }
353 }
354 
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)355 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,
356                     uint64_t addr) {
357   dst.ReadUintFromMem(vform, index, addr);
358 }
359 
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)360 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
361   dst.ClearForWrite(vform);
362   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
363     dst.ReadUintFromMem(vform, i, addr);
364   }
365 }
366 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)367 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
368                     LogicVRegister dst2, uint64_t addr1) {
369   dst1.ClearForWrite(vform);
370   dst2.ClearForWrite(vform);
371   int esize = LaneSizeInBytesFromFormat(vform);
372   uint64_t addr2 = addr1 + esize;
373   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
374     dst1.ReadUintFromMem(vform, i, addr1);
375     dst2.ReadUintFromMem(vform, i, addr2);
376     addr1 += 2 * esize;
377     addr2 += 2 * esize;
378   }
379 }
380 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)381 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
382                     LogicVRegister dst2, int index, uint64_t addr1) {
383   dst1.ClearForWrite(vform);
384   dst2.ClearForWrite(vform);
385   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
386   dst1.ReadUintFromMem(vform, index, addr1);
387   dst2.ReadUintFromMem(vform, index, addr2);
388 }
389 
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)390 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,
391                      LogicVRegister dst2, uint64_t addr) {
392   dst1.ClearForWrite(vform);
393   dst2.ClearForWrite(vform);
394   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
395   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
396     dst1.ReadUintFromMem(vform, i, addr);
397     dst2.ReadUintFromMem(vform, i, addr2);
398   }
399 }
400 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)401 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
402                     LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
403   dst1.ClearForWrite(vform);
404   dst2.ClearForWrite(vform);
405   dst3.ClearForWrite(vform);
406   int esize = LaneSizeInBytesFromFormat(vform);
407   uint64_t addr2 = addr1 + esize;
408   uint64_t addr3 = addr2 + esize;
409   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
410     dst1.ReadUintFromMem(vform, i, addr1);
411     dst2.ReadUintFromMem(vform, i, addr2);
412     dst3.ReadUintFromMem(vform, i, addr3);
413     addr1 += 3 * esize;
414     addr2 += 3 * esize;
415     addr3 += 3 * esize;
416   }
417 }
418 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)419 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
420                     LogicVRegister dst2, LogicVRegister dst3, int index,
421                     uint64_t addr1) {
422   dst1.ClearForWrite(vform);
423   dst2.ClearForWrite(vform);
424   dst3.ClearForWrite(vform);
425   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
426   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
427   dst1.ReadUintFromMem(vform, index, addr1);
428   dst2.ReadUintFromMem(vform, index, addr2);
429   dst3.ReadUintFromMem(vform, index, addr3);
430 }
431 
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)432 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,
433                      LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
434   dst1.ClearForWrite(vform);
435   dst2.ClearForWrite(vform);
436   dst3.ClearForWrite(vform);
437   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
438   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
439   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
440     dst1.ReadUintFromMem(vform, i, addr);
441     dst2.ReadUintFromMem(vform, i, addr2);
442     dst3.ReadUintFromMem(vform, i, addr3);
443   }
444 }
445 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)446 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
447                     LogicVRegister dst2, LogicVRegister dst3,
448                     LogicVRegister dst4, uint64_t addr1) {
449   dst1.ClearForWrite(vform);
450   dst2.ClearForWrite(vform);
451   dst3.ClearForWrite(vform);
452   dst4.ClearForWrite(vform);
453   int esize = LaneSizeInBytesFromFormat(vform);
454   uint64_t addr2 = addr1 + esize;
455   uint64_t addr3 = addr2 + esize;
456   uint64_t addr4 = addr3 + esize;
457   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
458     dst1.ReadUintFromMem(vform, i, addr1);
459     dst2.ReadUintFromMem(vform, i, addr2);
460     dst3.ReadUintFromMem(vform, i, addr3);
461     dst4.ReadUintFromMem(vform, i, addr4);
462     addr1 += 4 * esize;
463     addr2 += 4 * esize;
464     addr3 += 4 * esize;
465     addr4 += 4 * esize;
466   }
467 }
468 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)469 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
470                     LogicVRegister dst2, LogicVRegister dst3,
471                     LogicVRegister dst4, int index, uint64_t addr1) {
472   dst1.ClearForWrite(vform);
473   dst2.ClearForWrite(vform);
474   dst3.ClearForWrite(vform);
475   dst4.ClearForWrite(vform);
476   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
477   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
478   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
479   dst1.ReadUintFromMem(vform, index, addr1);
480   dst2.ReadUintFromMem(vform, index, addr2);
481   dst3.ReadUintFromMem(vform, index, addr3);
482   dst4.ReadUintFromMem(vform, index, addr4);
483 }
484 
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)485 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,
486                      LogicVRegister dst2, LogicVRegister dst3,
487                      LogicVRegister dst4, uint64_t addr) {
488   dst1.ClearForWrite(vform);
489   dst2.ClearForWrite(vform);
490   dst3.ClearForWrite(vform);
491   dst4.ClearForWrite(vform);
492   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
493   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
494   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
495   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
496     dst1.ReadUintFromMem(vform, i, addr);
497     dst2.ReadUintFromMem(vform, i, addr2);
498     dst3.ReadUintFromMem(vform, i, addr3);
499     dst4.ReadUintFromMem(vform, i, addr4);
500   }
501 }
502 
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)503 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
504   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
505     src.WriteUintToMem(vform, i, addr);
506     addr += LaneSizeInBytesFromFormat(vform);
507   }
508 }
509 
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)510 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,
511                     uint64_t addr) {
512   src.WriteUintToMem(vform, index, addr);
513 }
514 
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,uint64_t addr)515 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
516                     uint64_t addr) {
517   int esize = LaneSizeInBytesFromFormat(vform);
518   uint64_t addr2 = addr + esize;
519   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
520     dst.WriteUintToMem(vform, i, addr);
521     dst2.WriteUintToMem(vform, i, addr2);
522     addr += 2 * esize;
523     addr2 += 2 * esize;
524   }
525 }
526 
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,int index,uint64_t addr)527 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
528                     int index, uint64_t addr) {
529   int esize = LaneSizeInBytesFromFormat(vform);
530   dst.WriteUintToMem(vform, index, addr);
531   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
532 }
533 
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)534 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
535                     LogicVRegister dst3, uint64_t addr) {
536   int esize = LaneSizeInBytesFromFormat(vform);
537   uint64_t addr2 = addr + esize;
538   uint64_t addr3 = addr2 + esize;
539   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
540     dst.WriteUintToMem(vform, i, addr);
541     dst2.WriteUintToMem(vform, i, addr2);
542     dst3.WriteUintToMem(vform, i, addr3);
543     addr += 3 * esize;
544     addr2 += 3 * esize;
545     addr3 += 3 * esize;
546   }
547 }
548 
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr)549 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
550                     LogicVRegister dst3, int index, uint64_t addr) {
551   int esize = LaneSizeInBytesFromFormat(vform);
552   dst.WriteUintToMem(vform, index, addr);
553   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
554   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
555 }
556 
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)557 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
558                     LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
559   int esize = LaneSizeInBytesFromFormat(vform);
560   uint64_t addr2 = addr + esize;
561   uint64_t addr3 = addr2 + esize;
562   uint64_t addr4 = addr3 + esize;
563   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
564     dst.WriteUintToMem(vform, i, addr);
565     dst2.WriteUintToMem(vform, i, addr2);
566     dst3.WriteUintToMem(vform, i, addr3);
567     dst4.WriteUintToMem(vform, i, addr4);
568     addr += 4 * esize;
569     addr2 += 4 * esize;
570     addr3 += 4 * esize;
571     addr4 += 4 * esize;
572   }
573 }
574 
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr)575 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
576                     LogicVRegister dst3, LogicVRegister dst4, int index,
577                     uint64_t addr) {
578   int esize = LaneSizeInBytesFromFormat(vform);
579   dst.WriteUintToMem(vform, index, addr);
580   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
581   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
582   dst4.WriteUintToMem(vform, index, addr + 3 * esize);
583 }
584 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)585 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
586                               const LogicVRegister& src1,
587                               const LogicVRegister& src2, Condition cond) {
588   dst.ClearForWrite(vform);
589   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
590     int64_t sa = src1.Int(vform, i);
591     int64_t sb = src2.Int(vform, i);
592     uint64_t ua = src1.Uint(vform, i);
593     uint64_t ub = src2.Uint(vform, i);
594     bool result = false;
595     switch (cond) {
596       case eq:
597         result = (ua == ub);
598         break;
599       case ge:
600         result = (sa >= sb);
601         break;
602       case gt:
603         result = (sa > sb);
604         break;
605       case hi:
606         result = (ua > ub);
607         break;
608       case hs:
609         result = (ua >= ub);
610         break;
611       case lt:
612         result = (sa < sb);
613         break;
614       case le:
615         result = (sa <= sb);
616         break;
617       default:
618         UNREACHABLE();
619     }
620     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
621   }
622   return dst;
623 }
624 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)625 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
626                               const LogicVRegister& src1, int imm,
627                               Condition cond) {
628   SimVRegister temp;
629   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
630   return cmp(vform, dst, src1, imm_reg, cond);
631 }
632 
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)633 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,
634                                  const LogicVRegister& src1,
635                                  const LogicVRegister& src2) {
636   dst.ClearForWrite(vform);
637   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
638     uint64_t ua = src1.Uint(vform, i);
639     uint64_t ub = src2.Uint(vform, i);
640     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
641   }
642   return dst;
643 }
644 
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)645 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,
646                               const LogicVRegister& src1,
647                               const LogicVRegister& src2) {
648   int lane_size = LaneSizeInBitsFromFormat(vform);
649   dst.ClearForWrite(vform);
650   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
651     // Test for unsigned saturation.
652     uint64_t ua = src1.UintLeftJustified(vform, i);
653     uint64_t ub = src2.UintLeftJustified(vform, i);
654     uint64_t ur = ua + ub;
655     if (ur < ua) {
656       dst.SetUnsignedSat(i, true);
657     }
658 
659     // Test for signed saturation.
660     bool pos_a = (ua >> 63) == 0;
661     bool pos_b = (ub >> 63) == 0;
662     bool pos_r = (ur >> 63) == 0;
663     // If the signs of the operands are the same, but different from the result,
664     // there was an overflow.
665     if ((pos_a == pos_b) && (pos_a != pos_r)) {
666       dst.SetSignedSat(i, pos_a);
667     }
668 
669     dst.SetInt(vform, i, ur >> (64 - lane_size));
670   }
671   return dst;
672 }
673 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)674 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
675                                const LogicVRegister& src1,
676                                const LogicVRegister& src2) {
677   SimVRegister temp1, temp2;
678   uzp1(vform, temp1, src1, src2);
679   uzp2(vform, temp2, src1, src2);
680   add(vform, dst, temp1, temp2);
681   return dst;
682 }
683 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)684 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
685                               const LogicVRegister& src1,
686                               const LogicVRegister& src2) {
687   SimVRegister temp;
688   mul(vform, temp, src1, src2);
689   add(vform, dst, dst, temp);
690   return dst;
691 }
692 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)693 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
694                               const LogicVRegister& src1,
695                               const LogicVRegister& src2) {
696   SimVRegister temp;
697   mul(vform, temp, src1, src2);
698   sub(vform, dst, dst, temp);
699   return dst;
700 }
701 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)702 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
703                               const LogicVRegister& src1,
704                               const LogicVRegister& src2) {
705   dst.ClearForWrite(vform);
706   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
707     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
708   }
709   return dst;
710 }
711 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)712 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
713                               const LogicVRegister& src1,
714                               const LogicVRegister& src2, int index) {
715   SimVRegister temp;
716   VectorFormat indexform = VectorFormatFillQ(vform);
717   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
718 }
719 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)720 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
721                               const LogicVRegister& src1,
722                               const LogicVRegister& src2, int index) {
723   SimVRegister temp;
724   VectorFormat indexform = VectorFormatFillQ(vform);
725   return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
726 }
727 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)728 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
729                               const LogicVRegister& src1,
730                               const LogicVRegister& src2, int index) {
731   SimVRegister temp;
732   VectorFormat indexform = VectorFormatFillQ(vform);
733   return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
734 }
735 
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)736 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
737                                 const LogicVRegister& src1,
738                                 const LogicVRegister& src2, int index) {
739   SimVRegister temp;
740   VectorFormat indexform =
741       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
742   return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
743 }
744 
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)745 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
746                                  const LogicVRegister& src1,
747                                  const LogicVRegister& src2, int index) {
748   SimVRegister temp;
749   VectorFormat indexform =
750       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
751   return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
752 }
753 
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)754 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
755                                 const LogicVRegister& src1,
756                                 const LogicVRegister& src2, int index) {
757   SimVRegister temp;
758   VectorFormat indexform =
759       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
760   return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
761 }
762 
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)763 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
764                                  const LogicVRegister& src1,
765                                  const LogicVRegister& src2, int index) {
766   SimVRegister temp;
767   VectorFormat indexform =
768       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
769   return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
770 }
771 
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)772 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
773                                 const LogicVRegister& src1,
774                                 const LogicVRegister& src2, int index) {
775   SimVRegister temp;
776   VectorFormat indexform =
777       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
778   return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
779 }
780 
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)781 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
782                                  const LogicVRegister& src1,
783                                  const LogicVRegister& src2, int index) {
784   SimVRegister temp;
785   VectorFormat indexform =
786       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
787   return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
788 }
789 
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)790 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
791                                 const LogicVRegister& src1,
792                                 const LogicVRegister& src2, int index) {
793   SimVRegister temp;
794   VectorFormat indexform =
795       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
796   return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
797 }
798 
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)799 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
800                                  const LogicVRegister& src1,
801                                  const LogicVRegister& src2, int index) {
802   SimVRegister temp;
803   VectorFormat indexform =
804       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
805   return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
806 }
807 
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)808 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
809                                 const LogicVRegister& src1,
810                                 const LogicVRegister& src2, int index) {
811   SimVRegister temp;
812   VectorFormat indexform =
813       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
814   return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
815 }
816 
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)817 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
818                                  const LogicVRegister& src1,
819                                  const LogicVRegister& src2, int index) {
820   SimVRegister temp;
821   VectorFormat indexform =
822       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
823   return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
824 }
825 
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)826 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
827                                 const LogicVRegister& src1,
828                                 const LogicVRegister& src2, int index) {
829   SimVRegister temp;
830   VectorFormat indexform =
831       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
832   return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
833 }
834 
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)835 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
836                                  const LogicVRegister& src1,
837                                  const LogicVRegister& src2, int index) {
838   SimVRegister temp;
839   VectorFormat indexform =
840       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
841   return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
842 }
843 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)844 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
845                                   const LogicVRegister& src1,
846                                   const LogicVRegister& src2, int index) {
847   SimVRegister temp;
848   VectorFormat indexform =
849       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
850   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
851 }
852 
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)853 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
854                                    const LogicVRegister& src1,
855                                    const LogicVRegister& src2, int index) {
856   SimVRegister temp;
857   VectorFormat indexform =
858       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
859   return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
860 }
861 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)862 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
863                                   const LogicVRegister& src1,
864                                   const LogicVRegister& src2, int index) {
865   SimVRegister temp;
866   VectorFormat indexform =
867       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
868   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
869 }
870 
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)871 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
872                                    const LogicVRegister& src1,
873                                    const LogicVRegister& src2, int index) {
874   SimVRegister temp;
875   VectorFormat indexform =
876       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
877   return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
878 }
879 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)880 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
881                                   const LogicVRegister& src1,
882                                   const LogicVRegister& src2, int index) {
883   SimVRegister temp;
884   VectorFormat indexform =
885       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
886   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
887 }
888 
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)889 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
890                                    const LogicVRegister& src1,
891                                    const LogicVRegister& src2, int index) {
892   SimVRegister temp;
893   VectorFormat indexform =
894       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
895   return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
896 }
897 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)898 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
899                                   const LogicVRegister& src1,
900                                   const LogicVRegister& src2, int index) {
901   SimVRegister temp;
902   VectorFormat indexform = VectorFormatFillQ(vform);
903   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
904 }
905 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)906 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
907                                    const LogicVRegister& src1,
908                                    const LogicVRegister& src2, int index) {
909   SimVRegister temp;
910   VectorFormat indexform = VectorFormatFillQ(vform);
911   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
912 }
913 
PolynomialMult(uint8_t op1,uint8_t op2)914 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
915   uint16_t result = 0;
916   uint16_t extended_op2 = op2;
917   for (int i = 0; i < 8; ++i) {
918     if ((op1 >> i) & 1) {
919       result = result ^ (extended_op2 << i);
920     }
921   }
922   return result;
923 }
924 
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)925 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,
926                                const LogicVRegister& src1,
927                                const LogicVRegister& src2) {
928   dst.ClearForWrite(vform);
929   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
930     dst.SetUint(vform, i,
931                 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
932   }
933   return dst;
934 }
935 
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)936 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,
937                                 const LogicVRegister& src1,
938                                 const LogicVRegister& src2) {
939   VectorFormat vform_src = VectorFormatHalfWidth(vform);
940   dst.ClearForWrite(vform);
941   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
942     dst.SetUint(
943         vform, i,
944         PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i)));
945   }
946   return dst;
947 }
948 
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)949 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,
950                                  const LogicVRegister& src1,
951                                  const LogicVRegister& src2) {
952   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
953   dst.ClearForWrite(vform);
954   int lane_count = LaneCountFromFormat(vform);
955   for (int i = 0; i < lane_count; i++) {
956     dst.SetUint(vform, i,
957                 PolynomialMult(src1.Uint(vform_src, lane_count + i),
958                                src2.Uint(vform_src, lane_count + i)));
959   }
960   return dst;
961 }
962 
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)963 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,
964                               const LogicVRegister& src1,
965                               const LogicVRegister& src2) {
966   int lane_size = LaneSizeInBitsFromFormat(vform);
967   dst.ClearForWrite(vform);
968   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
969     // Test for unsigned saturation.
970     uint64_t ua = src1.UintLeftJustified(vform, i);
971     uint64_t ub = src2.UintLeftJustified(vform, i);
972     uint64_t ur = ua - ub;
973     if (ub > ua) {
974       dst.SetUnsignedSat(i, false);
975     }
976 
977     // Test for signed saturation.
978     bool pos_a = (ua >> 63) == 0;
979     bool pos_b = (ub >> 63) == 0;
980     bool pos_r = (ur >> 63) == 0;
981     // If the signs of the operands are different, and the sign of the first
982     // operand doesn't match the result, there was an overflow.
983     if ((pos_a != pos_b) && (pos_a != pos_r)) {
984       dst.SetSignedSat(i, pos_a);
985     }
986 
987     dst.SetInt(vform, i, ur >> (64 - lane_size));
988   }
989   return dst;
990 }
991 
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)992 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,
993                                const LogicVRegister& src1,
994                                const LogicVRegister& src2) {
995   dst.ClearForWrite(vform);
996   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
997     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
998   }
999   return dst;
1000 }
1001 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1002 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
1003                               const LogicVRegister& src1,
1004                               const LogicVRegister& src2) {
1005   dst.ClearForWrite(vform);
1006   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1007     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1008   }
1009   return dst;
1010 }
1011 
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1012 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,
1013                               const LogicVRegister& src1,
1014                               const LogicVRegister& src2) {
1015   dst.ClearForWrite(vform);
1016   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1017     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1018   }
1019   return dst;
1020 }
1021 
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1022 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,
1023                               const LogicVRegister& src1,
1024                               const LogicVRegister& src2) {
1025   dst.ClearForWrite(vform);
1026   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1027     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1028   }
1029   return dst;
1030 }
1031 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1032 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1033                               const LogicVRegister& src1,
1034                               const LogicVRegister& src2) {
1035   dst.ClearForWrite(vform);
1036   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1037     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1038   }
1039   return dst;
1040 }
1041 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1042 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1043                               const LogicVRegister& src, uint64_t imm) {
1044   uint64_t result[16];
1045   int laneCount = LaneCountFromFormat(vform);
1046   for (int i = 0; i < laneCount; ++i) {
1047     result[i] = src.Uint(vform, i) & ~imm;
1048   }
1049   dst.SetUintArray(vform, result);
1050   return dst;
1051 }
1052 
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1053 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,
1054                               const LogicVRegister& src1,
1055                               const LogicVRegister& src2) {
1056   dst.ClearForWrite(vform);
1057   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1058     uint64_t operand1 = dst.Uint(vform, i);
1059     uint64_t operand2 = ~src2.Uint(vform, i);
1060     uint64_t operand3 = src1.Uint(vform, i);
1061     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1062     dst.SetUint(vform, i, result);
1063   }
1064   return dst;
1065 }
1066 
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1067 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,
1068                               const LogicVRegister& src1,
1069                               const LogicVRegister& src2) {
1070   dst.ClearForWrite(vform);
1071   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1072     uint64_t operand1 = dst.Uint(vform, i);
1073     uint64_t operand2 = src2.Uint(vform, i);
1074     uint64_t operand3 = src1.Uint(vform, i);
1075     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1076     dst.SetUint(vform, i, result);
1077   }
1078   return dst;
1079 }
1080 
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1081 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,
1082                               const LogicVRegister& src1,
1083                               const LogicVRegister& src2) {
1084   dst.ClearForWrite(vform);
1085   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1086     uint64_t operand1 = src2.Uint(vform, i);
1087     uint64_t operand2 = dst.Uint(vform, i);
1088     uint64_t operand3 = src1.Uint(vform, i);
1089     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1090     dst.SetUint(vform, i, result);
1091   }
1092   return dst;
1093 }
1094 
SMinMax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1095 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,
1096                                   const LogicVRegister& src1,
1097                                   const LogicVRegister& src2, bool max) {
1098   dst.ClearForWrite(vform);
1099   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1100     int64_t src1_val = src1.Int(vform, i);
1101     int64_t src2_val = src2.Int(vform, i);
1102     int64_t dst_val;
1103     if (max) {
1104       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1105     } else {
1106       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1107     }
1108     dst.SetInt(vform, i, dst_val);
1109   }
1110   return dst;
1111 }
1112 
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1113 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,
1114                                const LogicVRegister& src1,
1115                                const LogicVRegister& src2) {
1116   return SMinMax(vform, dst, src1, src2, true);
1117 }
1118 
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1119 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,
1120                                const LogicVRegister& src1,
1121                                const LogicVRegister& src2) {
1122   return SMinMax(vform, dst, src1, src2, false);
1123 }
1124 
SMinMaxP(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1125 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,
1126                                    const LogicVRegister& src1,
1127                                    const LogicVRegister& src2, bool max) {
1128   int lanes = LaneCountFromFormat(vform);
1129   int64_t result[kMaxLanesPerVector];
1130   const LogicVRegister* src = &src1;
1131   for (int j = 0; j < 2; j++) {
1132     for (int i = 0; i < lanes; i += 2) {
1133       int64_t first_val = src->Int(vform, i);
1134       int64_t second_val = src->Int(vform, i + 1);
1135       int64_t dst_val;
1136       if (max) {
1137         dst_val = (first_val > second_val) ? first_val : second_val;
1138       } else {
1139         dst_val = (first_val < second_val) ? first_val : second_val;
1140       }
1141       DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1142       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1143     }
1144     src = &src2;
1145   }
1146   dst.SetIntArray(vform, result);
1147   return dst;
1148 }
1149 
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1150 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,
1151                                 const LogicVRegister& src1,
1152                                 const LogicVRegister& src2) {
1153   return SMinMaxP(vform, dst, src1, src2, true);
1154 }
1155 
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1156 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,
1157                                 const LogicVRegister& src1,
1158                                 const LogicVRegister& src2) {
1159   return SMinMaxP(vform, dst, src1, src2, false);
1160 }
1161 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1162 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
1163                                const LogicVRegister& src) {
1164   DCHECK_EQ(vform, kFormatD);
1165 
1166   uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1167   dst.ClearForWrite(vform);
1168   dst.SetUint(vform, 0, dst_val);
1169   return dst;
1170 }
1171 
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1172 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,
1173                                const LogicVRegister& src) {
1174   VectorFormat vform_dst =
1175       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1176 
1177   int64_t dst_val = 0;
1178   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1179     dst_val += src.Int(vform, i);
1180   }
1181 
1182   dst.ClearForWrite(vform_dst);
1183   dst.SetInt(vform_dst, 0, dst_val);
1184   return dst;
1185 }
1186 
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1187 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,
1188                                  const LogicVRegister& src) {
1189   VectorFormat vform_dst =
1190       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1191 
1192   int64_t dst_val = 0;
1193   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1194     dst_val += src.Int(vform, i);
1195   }
1196 
1197   dst.ClearForWrite(vform_dst);
1198   dst.SetInt(vform_dst, 0, dst_val);
1199   return dst;
1200 }
1201 
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1202 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,
1203                                  const LogicVRegister& src) {
1204   VectorFormat vform_dst =
1205       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1206 
1207   uint64_t dst_val = 0;
1208   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1209     dst_val += src.Uint(vform, i);
1210   }
1211 
1212   dst.ClearForWrite(vform_dst);
1213   dst.SetUint(vform_dst, 0, dst_val);
1214   return dst;
1215 }
1216 
SMinMaxV(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1217 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,
1218                                    const LogicVRegister& src, bool max) {
1219   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1220   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1221     int64_t src_val = src.Int(vform, i);
1222     if (max) {
1223       dst_val = (src_val > dst_val) ? src_val : dst_val;
1224     } else {
1225       dst_val = (src_val < dst_val) ? src_val : dst_val;
1226     }
1227   }
1228   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1229   dst.SetInt(vform, 0, dst_val);
1230   return dst;
1231 }
1232 
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1233 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,
1234                                 const LogicVRegister& src) {
1235   SMinMaxV(vform, dst, src, true);
1236   return dst;
1237 }
1238 
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1239 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,
1240                                 const LogicVRegister& src) {
1241   SMinMaxV(vform, dst, src, false);
1242   return dst;
1243 }
1244 
UMinMax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1245 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,
1246                                   const LogicVRegister& src1,
1247                                   const LogicVRegister& src2, bool max) {
1248   dst.ClearForWrite(vform);
1249   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1250     uint64_t src1_val = src1.Uint(vform, i);
1251     uint64_t src2_val = src2.Uint(vform, i);
1252     uint64_t dst_val;
1253     if (max) {
1254       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1255     } else {
1256       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1257     }
1258     dst.SetUint(vform, i, dst_val);
1259   }
1260   return dst;
1261 }
1262 
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1263 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,
1264                                const LogicVRegister& src1,
1265                                const LogicVRegister& src2) {
1266   return UMinMax(vform, dst, src1, src2, true);
1267 }
1268 
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1269 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,
1270                                const LogicVRegister& src1,
1271                                const LogicVRegister& src2) {
1272   return UMinMax(vform, dst, src1, src2, false);
1273 }
1274 
UMinMaxP(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1275 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,
1276                                    const LogicVRegister& src1,
1277                                    const LogicVRegister& src2, bool max) {
1278   int lanes = LaneCountFromFormat(vform);
1279   uint64_t result[kMaxLanesPerVector];
1280   const LogicVRegister* src = &src1;
1281   for (int j = 0; j < 2; j++) {
1282     for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1283       uint64_t first_val = src->Uint(vform, i);
1284       uint64_t second_val = src->Uint(vform, i + 1);
1285       uint64_t dst_val;
1286       if (max) {
1287         dst_val = (first_val > second_val) ? first_val : second_val;
1288       } else {
1289         dst_val = (first_val < second_val) ? first_val : second_val;
1290       }
1291       DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1292       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1293     }
1294     src = &src2;
1295   }
1296   dst.SetUintArray(vform, result);
1297   return dst;
1298 }
1299 
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1300 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,
1301                                 const LogicVRegister& src1,
1302                                 const LogicVRegister& src2) {
1303   return UMinMaxP(vform, dst, src1, src2, true);
1304 }
1305 
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1306 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,
1307                                 const LogicVRegister& src1,
1308                                 const LogicVRegister& src2) {
1309   return UMinMaxP(vform, dst, src1, src2, false);
1310 }
1311 
UMinMaxV(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1312 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,
1313                                    const LogicVRegister& src, bool max) {
1314   uint64_t dst_val = max ? 0 : UINT64_MAX;
1315   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1316     uint64_t src_val = src.Uint(vform, i);
1317     if (max) {
1318       dst_val = (src_val > dst_val) ? src_val : dst_val;
1319     } else {
1320       dst_val = (src_val < dst_val) ? src_val : dst_val;
1321     }
1322   }
1323   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1324   dst.SetUint(vform, 0, dst_val);
1325   return dst;
1326 }
1327 
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1328 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,
1329                                 const LogicVRegister& src) {
1330   UMinMaxV(vform, dst, src, true);
1331   return dst;
1332 }
1333 
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1334 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,
1335                                 const LogicVRegister& src) {
1336   UMinMaxV(vform, dst, src, false);
1337   return dst;
1338 }
1339 
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1340 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,
1341                               const LogicVRegister& src, int shift) {
1342   DCHECK_GE(shift, 0);
1343   SimVRegister temp;
1344   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1345   return ushl(vform, dst, src, shiftreg);
1346 }
1347 
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1348 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,
1349                                 const LogicVRegister& src, int shift) {
1350   DCHECK_GE(shift, 0);
1351   SimVRegister temp1, temp2;
1352   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1353   LogicVRegister extendedreg = sxtl(vform, temp2, src);
1354   return sshl(vform, dst, extendedreg, shiftreg);
1355 }
1356 
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1357 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,
1358                                  const LogicVRegister& src, int shift) {
1359   DCHECK_GE(shift, 0);
1360   SimVRegister temp1, temp2;
1361   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1362   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1363   return sshl(vform, dst, extendedreg, shiftreg);
1364 }
1365 
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1366 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,
1367                                const LogicVRegister& src) {
1368   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1369   return sshll(vform, dst, src, shift);
1370 }
1371 
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1372 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,
1373                                 const LogicVRegister& src) {
1374   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1375   return sshll2(vform, dst, src, shift);
1376 }
1377 
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1378 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,
1379                                 const LogicVRegister& src, int shift) {
1380   DCHECK_GE(shift, 0);
1381   SimVRegister temp1, temp2;
1382   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1383   LogicVRegister extendedreg = uxtl(vform, temp2, src);
1384   return ushl(vform, dst, extendedreg, shiftreg);
1385 }
1386 
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1387 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,
1388                                  const LogicVRegister& src, int shift) {
1389   DCHECK_GE(shift, 0);
1390   SimVRegister temp1, temp2;
1391   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1392   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1393   return ushl(vform, dst, extendedreg, shiftreg);
1394 }
1395 
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1396 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,
1397                               const LogicVRegister& src, int shift) {
1398   dst.ClearForWrite(vform);
1399   int laneCount = LaneCountFromFormat(vform);
1400   for (int i = 0; i < laneCount; i++) {
1401     uint64_t src_lane = src.Uint(vform, i);
1402     uint64_t dst_lane = dst.Uint(vform, i);
1403     uint64_t shifted = src_lane << shift;
1404     uint64_t mask = MaxUintFromFormat(vform) << shift;
1405     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1406   }
1407   return dst;
1408 }
1409 
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1410 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,
1411                                 const LogicVRegister& src, int shift) {
1412   DCHECK_GE(shift, 0);
1413   SimVRegister temp;
1414   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1415   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1416 }
1417 
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1418 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,
1419                                 const LogicVRegister& src, int shift) {
1420   DCHECK_GE(shift, 0);
1421   SimVRegister temp;
1422   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1423   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1424 }
1425 
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1426 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,
1427                                  const LogicVRegister& src, int shift) {
1428   DCHECK_GE(shift, 0);
1429   SimVRegister temp;
1430   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1431   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1432 }
1433 
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1434 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,
1435                               const LogicVRegister& src, int shift) {
1436   dst.ClearForWrite(vform);
1437   int laneCount = LaneCountFromFormat(vform);
1438   DCHECK((shift > 0) &&
1439          (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1440   for (int i = 0; i < laneCount; i++) {
1441     uint64_t src_lane = src.Uint(vform, i);
1442     uint64_t dst_lane = dst.Uint(vform, i);
1443     uint64_t shifted;
1444     uint64_t mask;
1445     if (shift == 64) {
1446       shifted = 0;
1447       mask = 0;
1448     } else {
1449       shifted = src_lane >> shift;
1450       mask = MaxUintFromFormat(vform) >> shift;
1451     }
1452     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1453   }
1454   return dst;
1455 }
1456 
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1457 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,
1458                                const LogicVRegister& src, int shift) {
1459   DCHECK_GE(shift, 0);
1460   SimVRegister temp;
1461   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1462   return ushl(vform, dst, src, shiftreg);
1463 }
1464 
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1465 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,
1466                                const LogicVRegister& src, int shift) {
1467   DCHECK_GE(shift, 0);
1468   SimVRegister temp;
1469   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1470   return sshl(vform, dst, src, shiftreg);
1471 }
1472 
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1473 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,
1474                                const LogicVRegister& src, int shift) {
1475   SimVRegister temp;
1476   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1477   return add(vform, dst, dst, shifted_reg);
1478 }
1479 
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1480 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,
1481                                const LogicVRegister& src, int shift) {
1482   SimVRegister temp;
1483   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1484   return add(vform, dst, dst, shifted_reg);
1485 }
1486 
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1487 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,
1488                                 const LogicVRegister& src, int shift) {
1489   SimVRegister temp;
1490   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1491   return add(vform, dst, dst, shifted_reg);
1492 }
1493 
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1494 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,
1495                                 const LogicVRegister& src, int shift) {
1496   SimVRegister temp;
1497   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1498   return add(vform, dst, dst, shifted_reg);
1499 }
1500 
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1501 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,
1502                               const LogicVRegister& src) {
1503   uint64_t result[16];
1504   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1505   int laneCount = LaneCountFromFormat(vform);
1506   for (int i = 0; i < laneCount; i++) {
1507     result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1508   }
1509 
1510   dst.SetUintArray(vform, result);
1511   return dst;
1512 }
1513 
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1514 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,
1515                               const LogicVRegister& src) {
1516   uint64_t result[16];
1517   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1518   int laneCount = LaneCountFromFormat(vform);
1519   for (int i = 0; i < laneCount; i++) {
1520     result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1521   }
1522 
1523   dst.SetUintArray(vform, result);
1524   return dst;
1525 }
1526 
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1527 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,
1528                               const LogicVRegister& src) {
1529   uint64_t result[16];
1530   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1531   int laneCount = LaneCountFromFormat(vform);
1532   for (int i = 0; i < laneCount; i++) {
1533     uint64_t value = src.Uint(vform, i);
1534     result[i] = 0;
1535     for (int j = 0; j < laneSizeInBits; j++) {
1536       result[i] += (value & 1);
1537       value >>= 1;
1538     }
1539   }
1540 
1541   dst.SetUintArray(vform, result);
1542   return dst;
1543 }
1544 
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1545 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,
1546                                const LogicVRegister& src1,
1547                                const LogicVRegister& src2) {
1548   dst.ClearForWrite(vform);
1549   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1550     int8_t shift_val = src2.Int(vform, i);
1551     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1552 
1553     // Set signed saturation state.
1554     if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
1555         (lj_src_val != 0)) {
1556       dst.SetSignedSat(i, lj_src_val >= 0);
1557     }
1558 
1559     // Set unsigned saturation state.
1560     if (lj_src_val < 0) {
1561       dst.SetUnsignedSat(i, false);
1562     } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
1563                (lj_src_val != 0)) {
1564       dst.SetUnsignedSat(i, true);
1565     }
1566 
1567     int64_t src_val = src1.Int(vform, i);
1568     bool src_is_negative = src_val < 0;
1569     if (shift_val > 63) {
1570       dst.SetInt(vform, i, 0);
1571     } else if (shift_val < -63) {
1572       dst.SetRounding(i, src_is_negative);
1573       dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1574     } else {
1575       // Use unsigned types for shifts, as behaviour is undefined for signed
1576       // lhs.
1577       uint64_t usrc_val = static_cast<uint64_t>(src_val);
1578 
1579       if (shift_val < 0) {
1580         // Convert to right shift.
1581         shift_val = -shift_val;
1582 
1583         // Set rounding state by testing most-significant bit shifted out.
1584         // Rounding only needed on right shifts.
1585         if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1586           dst.SetRounding(i, true);
1587         }
1588 
1589         usrc_val >>= shift_val;
1590 
1591         if (src_is_negative) {
1592           // Simulate sign-extension.
1593           usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1594         }
1595       } else {
1596         usrc_val <<= shift_val;
1597       }
1598       dst.SetUint(vform, i, usrc_val);
1599     }
1600   }
1601   return dst;
1602 }
1603 
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1604 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,
1605                                const LogicVRegister& src1,
1606                                const LogicVRegister& src2) {
1607   dst.ClearForWrite(vform);
1608   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1609     int8_t shift_val = src2.Int(vform, i);
1610     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1611 
1612     // Set saturation state.
1613     if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
1614       dst.SetUnsignedSat(i, true);
1615     }
1616 
1617     uint64_t src_val = src1.Uint(vform, i);
1618     if ((shift_val > 63) || (shift_val < -64)) {
1619       dst.SetUint(vform, i, 0);
1620     } else {
1621       if (shift_val < 0) {
1622         // Set rounding state. Rounding only needed on right shifts.
1623         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1624           dst.SetRounding(i, true);
1625         }
1626 
1627         if (shift_val == -64) {
1628           src_val = 0;
1629         } else {
1630           src_val >>= -shift_val;
1631         }
1632       } else {
1633         src_val <<= shift_val;
1634       }
1635       dst.SetUint(vform, i, src_val);
1636     }
1637   }
1638   return dst;
1639 }
1640 
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1641 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,
1642                               const LogicVRegister& src) {
1643   dst.ClearForWrite(vform);
1644   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1645     // Test for signed saturation.
1646     int64_t sa = src.Int(vform, i);
1647     if (sa == MinIntFromFormat(vform)) {
1648       dst.SetSignedSat(i, true);
1649     }
1650     dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1651   }
1652   return dst;
1653 }
1654 
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1655 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,
1656                                  const LogicVRegister& src) {
1657   dst.ClearForWrite(vform);
1658   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1659     int64_t sa = dst.IntLeftJustified(vform, i);
1660     uint64_t ub = src.UintLeftJustified(vform, i);
1661     uint64_t ur = sa + ub;
1662 
1663     int64_t sr = bit_cast<int64_t>(ur);
1664     if (sr < sa) {  // Test for signed positive saturation.
1665       dst.SetInt(vform, i, MaxIntFromFormat(vform));
1666     } else {
1667       dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
1668     }
1669   }
1670   return dst;
1671 }
1672 
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1673 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,
1674                                  const LogicVRegister& src) {
1675   dst.ClearForWrite(vform);
1676   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1677     uint64_t ua = dst.UintLeftJustified(vform, i);
1678     int64_t sb = src.IntLeftJustified(vform, i);
1679     uint64_t ur = ua + sb;
1680 
1681     if ((sb > 0) && (ur <= ua)) {
1682       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
1683     } else if ((sb < 0) && (ur >= ua)) {
1684       dst.SetUint(vform, i, 0);  // Negative saturation.
1685     } else {
1686       dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
1687     }
1688   }
1689   return dst;
1690 }
1691 
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1692 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,
1693                               const LogicVRegister& src) {
1694   dst.ClearForWrite(vform);
1695   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1696     // Test for signed saturation.
1697     int64_t sa = src.Int(vform, i);
1698     if (sa == MinIntFromFormat(vform)) {
1699       dst.SetSignedSat(i, true);
1700     }
1701     if (sa < 0) {
1702       dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1703     } else {
1704       dst.SetInt(vform, i, sa);
1705     }
1706   }
1707   return dst;
1708 }
1709 
ExtractNarrow(VectorFormat dstform,LogicVRegister dst,bool dstIsSigned,const LogicVRegister & src,bool srcIsSigned)1710 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,
1711                                         LogicVRegister dst, bool dstIsSigned,
1712                                         const LogicVRegister& src,
1713                                         bool srcIsSigned) {
1714   bool upperhalf = false;
1715   VectorFormat srcform = kFormatUndefined;
1716   int64_t ssrc[8];
1717   uint64_t usrc[8];
1718 
1719   switch (dstform) {
1720     case kFormat8B:
1721       upperhalf = false;
1722       srcform = kFormat8H;
1723       break;
1724     case kFormat16B:
1725       upperhalf = true;
1726       srcform = kFormat8H;
1727       break;
1728     case kFormat4H:
1729       upperhalf = false;
1730       srcform = kFormat4S;
1731       break;
1732     case kFormat8H:
1733       upperhalf = true;
1734       srcform = kFormat4S;
1735       break;
1736     case kFormat2S:
1737       upperhalf = false;
1738       srcform = kFormat2D;
1739       break;
1740     case kFormat4S:
1741       upperhalf = true;
1742       srcform = kFormat2D;
1743       break;
1744     case kFormatB:
1745       upperhalf = false;
1746       srcform = kFormatH;
1747       break;
1748     case kFormatH:
1749       upperhalf = false;
1750       srcform = kFormatS;
1751       break;
1752     case kFormatS:
1753       upperhalf = false;
1754       srcform = kFormatD;
1755       break;
1756     default:
1757       UNIMPLEMENTED();
1758   }
1759 
1760   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1761     ssrc[i] = src.Int(srcform, i);
1762     usrc[i] = src.Uint(srcform, i);
1763   }
1764 
1765   int offset;
1766   if (upperhalf) {
1767     offset = LaneCountFromFormat(dstform) / 2;
1768   } else {
1769     offset = 0;
1770     dst.ClearForWrite(dstform);
1771   }
1772 
1773   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1774     // Test for signed saturation
1775     if (ssrc[i] > MaxIntFromFormat(dstform)) {
1776       dst.SetSignedSat(offset + i, true);
1777     } else if (ssrc[i] < MinIntFromFormat(dstform)) {
1778       dst.SetSignedSat(offset + i, false);
1779     }
1780 
1781     // Test for unsigned saturation
1782     if (srcIsSigned) {
1783       if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
1784         dst.SetUnsignedSat(offset + i, true);
1785       } else if (ssrc[i] < 0) {
1786         dst.SetUnsignedSat(offset + i, false);
1787       }
1788     } else {
1789       if (usrc[i] > MaxUintFromFormat(dstform)) {
1790         dst.SetUnsignedSat(offset + i, true);
1791       }
1792     }
1793 
1794     int64_t result;
1795     if (srcIsSigned) {
1796       result = ssrc[i] & MaxUintFromFormat(dstform);
1797     } else {
1798       result = usrc[i] & MaxUintFromFormat(dstform);
1799     }
1800 
1801     if (dstIsSigned) {
1802       dst.SetInt(dstform, offset + i, result);
1803     } else {
1804       dst.SetUint(dstform, offset + i, result);
1805     }
1806   }
1807   return dst;
1808 }
1809 
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1810 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,
1811                               const LogicVRegister& src) {
1812   return ExtractNarrow(vform, dst, true, src, true);
1813 }
1814 
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1815 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,
1816                                 const LogicVRegister& src) {
1817   return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);
1818 }
1819 
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1820 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,
1821                                  const LogicVRegister& src) {
1822   return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
1823 }
1824 
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1825 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,
1826                                 const LogicVRegister& src) {
1827   return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
1828 }
1829 
AbsDiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool issigned)1830 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,
1831                                   const LogicVRegister& src1,
1832                                   const LogicVRegister& src2, bool issigned) {
1833   dst.ClearForWrite(vform);
1834   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1835     if (issigned) {
1836       int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
1837       sr = sr > 0 ? sr : -sr;
1838       dst.SetInt(vform, i, sr);
1839     } else {
1840       int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
1841       sr = sr > 0 ? sr : -sr;
1842       dst.SetUint(vform, i, sr);
1843     }
1844   }
1845   return dst;
1846 }
1847 
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1848 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,
1849                                const LogicVRegister& src1,
1850                                const LogicVRegister& src2) {
1851   SimVRegister temp;
1852   dst.ClearForWrite(vform);
1853   AbsDiff(vform, temp, src1, src2, true);
1854   add(vform, dst, dst, temp);
1855   return dst;
1856 }
1857 
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1858 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,
1859                                const LogicVRegister& src1,
1860                                const LogicVRegister& src2) {
1861   SimVRegister temp;
1862   dst.ClearForWrite(vform);
1863   AbsDiff(vform, temp, src1, src2, false);
1864   add(vform, dst, dst, temp);
1865   return dst;
1866 }
1867 
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1868 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,
1869                                const LogicVRegister& src) {
1870   dst.ClearForWrite(vform);
1871   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1872     dst.SetUint(vform, i, ~src.Uint(vform, i));
1873   }
1874   return dst;
1875 }
1876 
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1877 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,
1878                                const LogicVRegister& src) {
1879   uint64_t result[16];
1880   int laneCount = LaneCountFromFormat(vform);
1881   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1882   uint64_t reversed_value;
1883   uint64_t value;
1884   for (int i = 0; i < laneCount; i++) {
1885     value = src.Uint(vform, i);
1886     reversed_value = 0;
1887     for (int j = 0; j < laneSizeInBits; j++) {
1888       reversed_value = (reversed_value << 1) | (value & 1);
1889       value >>= 1;
1890     }
1891     result[i] = reversed_value;
1892   }
1893 
1894   dst.SetUintArray(vform, result);
1895   return dst;
1896 }
1897 
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int revSize)1898 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,
1899                               const LogicVRegister& src, int revSize) {
1900   uint64_t result[16];
1901   int laneCount = LaneCountFromFormat(vform);
1902   int laneSize = LaneSizeInBytesFromFormat(vform);
1903   int lanesPerLoop = revSize / laneSize;
1904   for (int i = 0; i < laneCount; i += lanesPerLoop) {
1905     for (int j = 0; j < lanesPerLoop; j++) {
1906       result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
1907     }
1908   }
1909   dst.SetUintArray(vform, result);
1910   return dst;
1911 }
1912 
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1913 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,
1914                                 const LogicVRegister& src) {
1915   return rev(vform, dst, src, 2);
1916 }
1917 
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1918 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,
1919                                 const LogicVRegister& src) {
1920   return rev(vform, dst, src, 4);
1921 }
1922 
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1923 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,
1924                                 const LogicVRegister& src) {
1925   return rev(vform, dst, src, 8);
1926 }
1927 
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)1928 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,
1929                                 const LogicVRegister& src, bool is_signed,
1930                                 bool do_accumulate) {
1931   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
1932   DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);
1933   DCHECK_LE(LaneCountFromFormat(vform), 8);
1934 
1935   uint64_t result[8];
1936   int lane_count = LaneCountFromFormat(vform);
1937   for (int i = 0; i < lane_count; i++) {
1938     if (is_signed) {
1939       result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
1940                                         src.Int(vformsrc, 2 * i + 1));
1941     } else {
1942       result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
1943     }
1944   }
1945 
1946   dst.ClearForWrite(vform);
1947   for (int i = 0; i < lane_count; ++i) {
1948     if (do_accumulate) {
1949       result[i] += dst.Uint(vform, i);
1950     }
1951     dst.SetUint(vform, i, result[i]);
1952   }
1953 
1954   return dst;
1955 }
1956 
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1957 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,
1958                                  const LogicVRegister& src) {
1959   return addlp(vform, dst, src, true, false);
1960 }
1961 
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1962 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,
1963                                  const LogicVRegister& src) {
1964   return addlp(vform, dst, src, false, false);
1965 }
1966 
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1967 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,
1968                                  const LogicVRegister& src) {
1969   return addlp(vform, dst, src, true, true);
1970 }
1971 
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1972 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,
1973                                  const LogicVRegister& src) {
1974   return addlp(vform, dst, src, false, true);
1975 }
1976 
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1977 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,
1978                               const LogicVRegister& src1,
1979                               const LogicVRegister& src2, int index) {
1980   uint8_t result[16];
1981   int laneCount = LaneCountFromFormat(vform);
1982   for (int i = 0; i < laneCount - index; ++i) {
1983     result[i] = src1.Uint(vform, i + index);
1984   }
1985   for (int i = 0; i < index; ++i) {
1986     result[laneCount - index + i] = src2.Uint(vform, i);
1987   }
1988   dst.ClearForWrite(vform);
1989   for (int i = 0; i < laneCount; ++i) {
1990     dst.SetUint(vform, i, result[i]);
1991   }
1992   return dst;
1993 }
1994 
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)1995 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,
1996                                       const LogicVRegister& src,
1997                                       int src_index) {
1998   int laneCount = LaneCountFromFormat(vform);
1999   uint64_t value = src.Uint(vform, src_index);
2000   dst.ClearForWrite(vform);
2001   for (int i = 0; i < laneCount; ++i) {
2002     dst.SetUint(vform, i, value);
2003   }
2004   return dst;
2005 }
2006 
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2007 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,
2008                                         uint64_t imm) {
2009   int laneCount = LaneCountFromFormat(vform);
2010   uint64_t value = imm & MaxUintFromFormat(vform);
2011   dst.ClearForWrite(vform);
2012   for (int i = 0; i < laneCount; ++i) {
2013     dst.SetUint(vform, i, value);
2014   }
2015   return dst;
2016 }
2017 
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2018 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,
2019                                       int dst_index, const LogicVRegister& src,
2020                                       int src_index) {
2021   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2022   return dst;
2023 }
2024 
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2025 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,
2026                                         int dst_index, uint64_t imm) {
2027   uint64_t value = imm & MaxUintFromFormat(vform);
2028   dst.SetUint(vform, dst_index, value);
2029   return dst;
2030 }
2031 
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)2032 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,
2033                                uint64_t imm) {
2034   int laneCount = LaneCountFromFormat(vform);
2035   dst.ClearForWrite(vform);
2036   for (int i = 0; i < laneCount; ++i) {
2037     dst.SetUint(vform, i, imm);
2038   }
2039   return dst;
2040 }
2041 
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)2042 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,
2043                                uint64_t imm) {
2044   int laneCount = LaneCountFromFormat(vform);
2045   dst.ClearForWrite(vform);
2046   for (int i = 0; i < laneCount; ++i) {
2047     dst.SetUint(vform, i, ~imm);
2048   }
2049   return dst;
2050 }
2051 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)2052 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
2053                               const LogicVRegister& src, uint64_t imm) {
2054   uint64_t result[16];
2055   int laneCount = LaneCountFromFormat(vform);
2056   for (int i = 0; i < laneCount; ++i) {
2057     result[i] = src.Uint(vform, i) | imm;
2058   }
2059   dst.SetUintArray(vform, result);
2060   return dst;
2061 }
2062 
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2063 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,
2064                                const LogicVRegister& src) {
2065   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2066 
2067   dst.ClearForWrite(vform);
2068   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2069     dst.SetUint(vform, i, src.Uint(vform_half, i));
2070   }
2071   return dst;
2072 }
2073 
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2074 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,
2075                                const LogicVRegister& src) {
2076   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2077 
2078   dst.ClearForWrite(vform);
2079   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2080     dst.SetInt(vform, i, src.Int(vform_half, i));
2081   }
2082   return dst;
2083 }
2084 
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2085 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,
2086                                 const LogicVRegister& src) {
2087   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2088   int lane_count = LaneCountFromFormat(vform);
2089 
2090   dst.ClearForWrite(vform);
2091   for (int i = 0; i < lane_count; i++) {
2092     dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2093   }
2094   return dst;
2095 }
2096 
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2097 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,
2098                                 const LogicVRegister& src) {
2099   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2100   int lane_count = LaneCountFromFormat(vform);
2101 
2102   dst.ClearForWrite(vform);
2103   for (int i = 0; i < lane_count; i++) {
2104     dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2105   }
2106   return dst;
2107 }
2108 
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2109 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,
2110                                const LogicVRegister& src, int shift) {
2111   SimVRegister temp;
2112   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2113   VectorFormat vform_dst = vform;
2114   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2115   return ExtractNarrow(vform_dst, dst, false, shifted_src, false);
2116 }
2117 
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2118 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,
2119                                 const LogicVRegister& src, int shift) {
2120   SimVRegister temp;
2121   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2122   VectorFormat vformdst = vform;
2123   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2124   return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2125 }
2126 
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2127 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,
2128                                 const LogicVRegister& src, int shift) {
2129   SimVRegister temp;
2130   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2131   VectorFormat vformdst = vform;
2132   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2133   return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2134 }
2135 
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2136 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,
2137                                  const LogicVRegister& src, int shift) {
2138   SimVRegister temp;
2139   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2140   VectorFormat vformdst = vform;
2141   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2142   return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2143 }
2144 
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)2145 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,
2146                                 const LogicVRegister& ind,
2147                                 bool zero_out_of_bounds,
2148                                 const LogicVRegister* tab1,
2149                                 const LogicVRegister* tab2,
2150                                 const LogicVRegister* tab3,
2151                                 const LogicVRegister* tab4) {
2152   DCHECK_NOT_NULL(tab1);
2153   const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2154   uint64_t result[kMaxLanesPerVector];
2155   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2156     result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2157   }
2158   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2159     uint64_t j = ind.Uint(vform, i);
2160     int tab_idx = static_cast<int>(j >> 4);
2161     int j_idx = static_cast<int>(j & 15);
2162     if ((tab_idx < 4) && (tab[tab_idx] != nullptr)) {
2163       result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2164     }
2165   }
2166   dst.SetUintArray(vform, result);
2167   return dst;
2168 }
2169 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2170 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2171                               const LogicVRegister& tab,
2172                               const LogicVRegister& ind) {
2173   return Table(vform, dst, ind, true, &tab);
2174 }
2175 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2176 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2177                               const LogicVRegister& tab,
2178                               const LogicVRegister& tab2,
2179                               const LogicVRegister& ind) {
2180   return Table(vform, dst, ind, true, &tab, &tab2);
2181 }
2182 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2183 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2184                               const LogicVRegister& tab,
2185                               const LogicVRegister& tab2,
2186                               const LogicVRegister& tab3,
2187                               const LogicVRegister& ind) {
2188   return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2189 }
2190 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2191 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2192                               const LogicVRegister& tab,
2193                               const LogicVRegister& tab2,
2194                               const LogicVRegister& tab3,
2195                               const LogicVRegister& tab4,
2196                               const LogicVRegister& ind) {
2197   return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2198 }
2199 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2200 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2201                               const LogicVRegister& tab,
2202                               const LogicVRegister& ind) {
2203   return Table(vform, dst, ind, false, &tab);
2204 }
2205 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2206 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2207                               const LogicVRegister& tab,
2208                               const LogicVRegister& tab2,
2209                               const LogicVRegister& ind) {
2210   return Table(vform, dst, ind, false, &tab, &tab2);
2211 }
2212 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2213 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2214                               const LogicVRegister& tab,
2215                               const LogicVRegister& tab2,
2216                               const LogicVRegister& tab3,
2217                               const LogicVRegister& ind) {
2218   return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2219 }
2220 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2221 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2222                               const LogicVRegister& tab,
2223                               const LogicVRegister& tab2,
2224                               const LogicVRegister& tab3,
2225                               const LogicVRegister& tab4,
2226                               const LogicVRegister& ind) {
2227   return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2228 }
2229 
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2230 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,
2231                                  const LogicVRegister& src, int shift) {
2232   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2233 }
2234 
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2235 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,
2236                                   const LogicVRegister& src, int shift) {
2237   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2238 }
2239 
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2240 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,
2241                                   const LogicVRegister& src, int shift) {
2242   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2243 }
2244 
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2245 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,
2246                                    const LogicVRegister& src, int shift) {
2247   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2248 }
2249 
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2250 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,
2251                                  const LogicVRegister& src, int shift) {
2252   SimVRegister temp;
2253   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2254   VectorFormat vformdst = vform;
2255   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2256   return sqxtn(vformdst, dst, shifted_src);
2257 }
2258 
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2259 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,
2260                                   const LogicVRegister& src, int shift) {
2261   SimVRegister temp;
2262   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2263   VectorFormat vformdst = vform;
2264   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2265   return sqxtn(vformdst, dst, shifted_src);
2266 }
2267 
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2268 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,
2269                                   const LogicVRegister& src, int shift) {
2270   SimVRegister temp;
2271   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2272   VectorFormat vformdst = vform;
2273   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2274   return sqxtn(vformdst, dst, shifted_src);
2275 }
2276 
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2277 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,
2278                                    const LogicVRegister& src, int shift) {
2279   SimVRegister temp;
2280   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2281   VectorFormat vformdst = vform;
2282   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2283   return sqxtn(vformdst, dst, shifted_src);
2284 }
2285 
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2286 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,
2287                                   const LogicVRegister& src, int shift) {
2288   SimVRegister temp;
2289   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2290   VectorFormat vformdst = vform;
2291   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2292   return sqxtun(vformdst, dst, shifted_src);
2293 }
2294 
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2295 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,
2296                                    const LogicVRegister& src, int shift) {
2297   SimVRegister temp;
2298   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2299   VectorFormat vformdst = vform;
2300   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2301   return sqxtun(vformdst, dst, shifted_src);
2302 }
2303 
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2304 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,
2305                                    const LogicVRegister& src, int shift) {
2306   SimVRegister temp;
2307   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2308   VectorFormat vformdst = vform;
2309   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2310   return sqxtun(vformdst, dst, shifted_src);
2311 }
2312 
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2313 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,
2314                                     const LogicVRegister& src, int shift) {
2315   SimVRegister temp;
2316   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2317   VectorFormat vformdst = vform;
2318   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2319   return sqxtun(vformdst, dst, shifted_src);
2320 }
2321 
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2322 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,
2323                                 const LogicVRegister& src1,
2324                                 const LogicVRegister& src2) {
2325   SimVRegister temp1, temp2;
2326   uxtl(vform, temp1, src1);
2327   uxtl(vform, temp2, src2);
2328   add(vform, dst, temp1, temp2);
2329   return dst;
2330 }
2331 
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2332 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,
2333                                  const LogicVRegister& src1,
2334                                  const LogicVRegister& src2) {
2335   SimVRegister temp1, temp2;
2336   uxtl2(vform, temp1, src1);
2337   uxtl2(vform, temp2, src2);
2338   add(vform, dst, temp1, temp2);
2339   return dst;
2340 }
2341 
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2342 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,
2343                                 const LogicVRegister& src1,
2344                                 const LogicVRegister& src2) {
2345   SimVRegister temp;
2346   uxtl(vform, temp, src2);
2347   add(vform, dst, src1, temp);
2348   return dst;
2349 }
2350 
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2351 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,
2352                                  const LogicVRegister& src1,
2353                                  const LogicVRegister& src2) {
2354   SimVRegister temp;
2355   uxtl2(vform, temp, src2);
2356   add(vform, dst, src1, temp);
2357   return dst;
2358 }
2359 
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2360 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,
2361                                 const LogicVRegister& src1,
2362                                 const LogicVRegister& src2) {
2363   SimVRegister temp1, temp2;
2364   sxtl(vform, temp1, src1);
2365   sxtl(vform, temp2, src2);
2366   add(vform, dst, temp1, temp2);
2367   return dst;
2368 }
2369 
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2370 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,
2371                                  const LogicVRegister& src1,
2372                                  const LogicVRegister& src2) {
2373   SimVRegister temp1, temp2;
2374   sxtl2(vform, temp1, src1);
2375   sxtl2(vform, temp2, src2);
2376   add(vform, dst, temp1, temp2);
2377   return dst;
2378 }
2379 
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2380 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,
2381                                 const LogicVRegister& src1,
2382                                 const LogicVRegister& src2) {
2383   SimVRegister temp;
2384   sxtl(vform, temp, src2);
2385   add(vform, dst, src1, temp);
2386   return dst;
2387 }
2388 
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2389 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,
2390                                  const LogicVRegister& src1,
2391                                  const LogicVRegister& src2) {
2392   SimVRegister temp;
2393   sxtl2(vform, temp, src2);
2394   add(vform, dst, src1, temp);
2395   return dst;
2396 }
2397 
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2398 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,
2399                                 const LogicVRegister& src1,
2400                                 const LogicVRegister& src2) {
2401   SimVRegister temp1, temp2;
2402   uxtl(vform, temp1, src1);
2403   uxtl(vform, temp2, src2);
2404   sub(vform, dst, temp1, temp2);
2405   return dst;
2406 }
2407 
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2408 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,
2409                                  const LogicVRegister& src1,
2410                                  const LogicVRegister& src2) {
2411   SimVRegister temp1, temp2;
2412   uxtl2(vform, temp1, src1);
2413   uxtl2(vform, temp2, src2);
2414   sub(vform, dst, temp1, temp2);
2415   return dst;
2416 }
2417 
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2418 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,
2419                                 const LogicVRegister& src1,
2420                                 const LogicVRegister& src2) {
2421   SimVRegister temp;
2422   uxtl(vform, temp, src2);
2423   sub(vform, dst, src1, temp);
2424   return dst;
2425 }
2426 
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2427 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,
2428                                  const LogicVRegister& src1,
2429                                  const LogicVRegister& src2) {
2430   SimVRegister temp;
2431   uxtl2(vform, temp, src2);
2432   sub(vform, dst, src1, temp);
2433   return dst;
2434 }
2435 
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2436 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,
2437                                 const LogicVRegister& src1,
2438                                 const LogicVRegister& src2) {
2439   SimVRegister temp1, temp2;
2440   sxtl(vform, temp1, src1);
2441   sxtl(vform, temp2, src2);
2442   sub(vform, dst, temp1, temp2);
2443   return dst;
2444 }
2445 
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2446 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,
2447                                  const LogicVRegister& src1,
2448                                  const LogicVRegister& src2) {
2449   SimVRegister temp1, temp2;
2450   sxtl2(vform, temp1, src1);
2451   sxtl2(vform, temp2, src2);
2452   sub(vform, dst, temp1, temp2);
2453   return dst;
2454 }
2455 
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2456 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,
2457                                 const LogicVRegister& src1,
2458                                 const LogicVRegister& src2) {
2459   SimVRegister temp;
2460   sxtl(vform, temp, src2);
2461   sub(vform, dst, src1, temp);
2462   return dst;
2463 }
2464 
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2465 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,
2466                                  const LogicVRegister& src1,
2467                                  const LogicVRegister& src2) {
2468   SimVRegister temp;
2469   sxtl2(vform, temp, src2);
2470   sub(vform, dst, src1, temp);
2471   return dst;
2472 }
2473 
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2474 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,
2475                                 const LogicVRegister& src1,
2476                                 const LogicVRegister& src2) {
2477   SimVRegister temp1, temp2;
2478   uxtl(vform, temp1, src1);
2479   uxtl(vform, temp2, src2);
2480   uaba(vform, dst, temp1, temp2);
2481   return dst;
2482 }
2483 
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2484 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,
2485                                  const LogicVRegister& src1,
2486                                  const LogicVRegister& src2) {
2487   SimVRegister temp1, temp2;
2488   uxtl2(vform, temp1, src1);
2489   uxtl2(vform, temp2, src2);
2490   uaba(vform, dst, temp1, temp2);
2491   return dst;
2492 }
2493 
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2494 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,
2495                                 const LogicVRegister& src1,
2496                                 const LogicVRegister& src2) {
2497   SimVRegister temp1, temp2;
2498   sxtl(vform, temp1, src1);
2499   sxtl(vform, temp2, src2);
2500   saba(vform, dst, temp1, temp2);
2501   return dst;
2502 }
2503 
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2504 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,
2505                                  const LogicVRegister& src1,
2506                                  const LogicVRegister& src2) {
2507   SimVRegister temp1, temp2;
2508   sxtl2(vform, temp1, src1);
2509   sxtl2(vform, temp2, src2);
2510   saba(vform, dst, temp1, temp2);
2511   return dst;
2512 }
2513 
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2514 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,
2515                                 const LogicVRegister& src1,
2516                                 const LogicVRegister& src2) {
2517   SimVRegister temp1, temp2;
2518   uxtl(vform, temp1, src1);
2519   uxtl(vform, temp2, src2);
2520   AbsDiff(vform, dst, temp1, temp2, false);
2521   return dst;
2522 }
2523 
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2524 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,
2525                                  const LogicVRegister& src1,
2526                                  const LogicVRegister& src2) {
2527   SimVRegister temp1, temp2;
2528   uxtl2(vform, temp1, src1);
2529   uxtl2(vform, temp2, src2);
2530   AbsDiff(vform, dst, temp1, temp2, false);
2531   return dst;
2532 }
2533 
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2534 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,
2535                                 const LogicVRegister& src1,
2536                                 const LogicVRegister& src2) {
2537   SimVRegister temp1, temp2;
2538   sxtl(vform, temp1, src1);
2539   sxtl(vform, temp2, src2);
2540   AbsDiff(vform, dst, temp1, temp2, true);
2541   return dst;
2542 }
2543 
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2544 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,
2545                                  const LogicVRegister& src1,
2546                                  const LogicVRegister& src2) {
2547   SimVRegister temp1, temp2;
2548   sxtl2(vform, temp1, src1);
2549   sxtl2(vform, temp2, src2);
2550   AbsDiff(vform, dst, temp1, temp2, true);
2551   return dst;
2552 }
2553 
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2554 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
2555                                 const LogicVRegister& src1,
2556                                 const LogicVRegister& src2) {
2557   SimVRegister temp1, temp2;
2558   uxtl(vform, temp1, src1);
2559   uxtl(vform, temp2, src2);
2560   mul(vform, dst, temp1, temp2);
2561   return dst;
2562 }
2563 
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2564 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
2565                                  const LogicVRegister& src1,
2566                                  const LogicVRegister& src2) {
2567   SimVRegister temp1, temp2;
2568   uxtl2(vform, temp1, src1);
2569   uxtl2(vform, temp2, src2);
2570   mul(vform, dst, temp1, temp2);
2571   return dst;
2572 }
2573 
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2574 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
2575                                 const LogicVRegister& src1,
2576                                 const LogicVRegister& src2) {
2577   SimVRegister temp1, temp2;
2578   sxtl(vform, temp1, src1);
2579   sxtl(vform, temp2, src2);
2580   mul(vform, dst, temp1, temp2);
2581   return dst;
2582 }
2583 
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2584 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
2585                                  const LogicVRegister& src1,
2586                                  const LogicVRegister& src2) {
2587   SimVRegister temp1, temp2;
2588   sxtl2(vform, temp1, src1);
2589   sxtl2(vform, temp2, src2);
2590   mul(vform, dst, temp1, temp2);
2591   return dst;
2592 }
2593 
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2594 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
2595                                 const LogicVRegister& src1,
2596                                 const LogicVRegister& src2) {
2597   SimVRegister temp1, temp2;
2598   uxtl(vform, temp1, src1);
2599   uxtl(vform, temp2, src2);
2600   mls(vform, dst, temp1, temp2);
2601   return dst;
2602 }
2603 
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2604 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
2605                                  const LogicVRegister& src1,
2606                                  const LogicVRegister& src2) {
2607   SimVRegister temp1, temp2;
2608   uxtl2(vform, temp1, src1);
2609   uxtl2(vform, temp2, src2);
2610   mls(vform, dst, temp1, temp2);
2611   return dst;
2612 }
2613 
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2614 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
2615                                 const LogicVRegister& src1,
2616                                 const LogicVRegister& src2) {
2617   SimVRegister temp1, temp2;
2618   sxtl(vform, temp1, src1);
2619   sxtl(vform, temp2, src2);
2620   mls(vform, dst, temp1, temp2);
2621   return dst;
2622 }
2623 
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2624 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
2625                                  const LogicVRegister& src1,
2626                                  const LogicVRegister& src2) {
2627   SimVRegister temp1, temp2;
2628   sxtl2(vform, temp1, src1);
2629   sxtl2(vform, temp2, src2);
2630   mls(vform, dst, temp1, temp2);
2631   return dst;
2632 }
2633 
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2634 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
2635                                 const LogicVRegister& src1,
2636                                 const LogicVRegister& src2) {
2637   SimVRegister temp1, temp2;
2638   uxtl(vform, temp1, src1);
2639   uxtl(vform, temp2, src2);
2640   mla(vform, dst, temp1, temp2);
2641   return dst;
2642 }
2643 
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2644 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
2645                                  const LogicVRegister& src1,
2646                                  const LogicVRegister& src2) {
2647   SimVRegister temp1, temp2;
2648   uxtl2(vform, temp1, src1);
2649   uxtl2(vform, temp2, src2);
2650   mla(vform, dst, temp1, temp2);
2651   return dst;
2652 }
2653 
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2654 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
2655                                 const LogicVRegister& src1,
2656                                 const LogicVRegister& src2) {
2657   SimVRegister temp1, temp2;
2658   sxtl(vform, temp1, src1);
2659   sxtl(vform, temp2, src2);
2660   mla(vform, dst, temp1, temp2);
2661   return dst;
2662 }
2663 
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2664 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
2665                                  const LogicVRegister& src1,
2666                                  const LogicVRegister& src2) {
2667   SimVRegister temp1, temp2;
2668   sxtl2(vform, temp1, src1);
2669   sxtl2(vform, temp2, src2);
2670   mla(vform, dst, temp1, temp2);
2671   return dst;
2672 }
2673 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2674 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
2675                                   const LogicVRegister& src1,
2676                                   const LogicVRegister& src2) {
2677   SimVRegister temp;
2678   LogicVRegister product = sqdmull(vform, temp, src1, src2);
2679   return add(vform, dst, dst, product).SignedSaturate(vform);
2680 }
2681 
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2682 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
2683                                    const LogicVRegister& src1,
2684                                    const LogicVRegister& src2) {
2685   SimVRegister temp;
2686   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2687   return add(vform, dst, dst, product).SignedSaturate(vform);
2688 }
2689 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2690 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
2691                                   const LogicVRegister& src1,
2692                                   const LogicVRegister& src2) {
2693   SimVRegister temp;
2694   LogicVRegister product = sqdmull(vform, temp, src1, src2);
2695   return sub(vform, dst, dst, product).SignedSaturate(vform);
2696 }
2697 
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2698 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
2699                                    const LogicVRegister& src1,
2700                                    const LogicVRegister& src2) {
2701   SimVRegister temp;
2702   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2703   return sub(vform, dst, dst, product).SignedSaturate(vform);
2704 }
2705 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2706 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
2707                                   const LogicVRegister& src1,
2708                                   const LogicVRegister& src2) {
2709   SimVRegister temp;
2710   LogicVRegister product = smull(vform, temp, src1, src2);
2711   return add(vform, dst, product, product).SignedSaturate(vform);
2712 }
2713 
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2714 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
2715                                    const LogicVRegister& src1,
2716                                    const LogicVRegister& src2) {
2717   SimVRegister temp;
2718   LogicVRegister product = smull2(vform, temp, src1, src2);
2719   return add(vform, dst, product, product).SignedSaturate(vform);
2720 }
2721 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)2722 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
2723                                    const LogicVRegister& src1,
2724                                    const LogicVRegister& src2, bool round) {
2725   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
2726   // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
2727   // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
2728 
2729   int esize = LaneSizeInBitsFromFormat(vform);
2730   int round_const = round ? (1 << (esize - 2)) : 0;
2731   int64_t product;
2732 
2733   dst.ClearForWrite(vform);
2734   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2735     product = src1.Int(vform, i) * src2.Int(vform, i);
2736     product += round_const;
2737     product = product >> (esize - 1);
2738 
2739     if (product > MaxIntFromFormat(vform)) {
2740       product = MaxIntFromFormat(vform);
2741     } else if (product < MinIntFromFormat(vform)) {
2742       product = MinIntFromFormat(vform);
2743     }
2744     dst.SetInt(vform, i, product);
2745   }
2746   return dst;
2747 }
2748 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2749 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
2750                                   const LogicVRegister& src1,
2751                                   const LogicVRegister& src2) {
2752   return sqrdmulh(vform, dst, src1, src2, false);
2753 }
2754 
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2755 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,
2756                                 const LogicVRegister& src1,
2757                                 const LogicVRegister& src2) {
2758   SimVRegister temp;
2759   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2760   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2761   return dst;
2762 }
2763 
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2764 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,
2765                                  const LogicVRegister& src1,
2766                                  const LogicVRegister& src2) {
2767   SimVRegister temp;
2768   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2769   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2770   return dst;
2771 }
2772 
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2773 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,
2774                                  const LogicVRegister& src1,
2775                                  const LogicVRegister& src2) {
2776   SimVRegister temp;
2777   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2778   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2779   return dst;
2780 }
2781 
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2782 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,
2783                                   const LogicVRegister& src1,
2784                                   const LogicVRegister& src2) {
2785   SimVRegister temp;
2786   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2787   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2788   return dst;
2789 }
2790 
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2791 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,
2792                                 const LogicVRegister& src1,
2793                                 const LogicVRegister& src2) {
2794   SimVRegister temp;
2795   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2796   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2797   return dst;
2798 }
2799 
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2800 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,
2801                                  const LogicVRegister& src1,
2802                                  const LogicVRegister& src2) {
2803   SimVRegister temp;
2804   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2805   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2806   return dst;
2807 }
2808 
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2809 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,
2810                                  const LogicVRegister& src1,
2811                                  const LogicVRegister& src2) {
2812   SimVRegister temp;
2813   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2814   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2815   return dst;
2816 }
2817 
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2818 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,
2819                                   const LogicVRegister& src1,
2820                                   const LogicVRegister& src2) {
2821   SimVRegister temp;
2822   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2823   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2824   return dst;
2825 }
2826 
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2827 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,
2828                                const LogicVRegister& src1,
2829                                const LogicVRegister& src2) {
2830   uint64_t result[16];
2831   int laneCount = LaneCountFromFormat(vform);
2832   int pairs = laneCount / 2;
2833   for (int i = 0; i < pairs; ++i) {
2834     result[2 * i] = src1.Uint(vform, 2 * i);
2835     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
2836   }
2837 
2838   dst.SetUintArray(vform, result);
2839   return dst;
2840 }
2841 
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2842 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,
2843                                const LogicVRegister& src1,
2844                                const LogicVRegister& src2) {
2845   uint64_t result[16];
2846   int laneCount = LaneCountFromFormat(vform);
2847   int pairs = laneCount / 2;
2848   for (int i = 0; i < pairs; ++i) {
2849     result[2 * i] = src1.Uint(vform, (2 * i) + 1);
2850     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
2851   }
2852 
2853   dst.SetUintArray(vform, result);
2854   return dst;
2855 }
2856 
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2857 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,
2858                                const LogicVRegister& src1,
2859                                const LogicVRegister& src2) {
2860   uint64_t result[16];
2861   int laneCount = LaneCountFromFormat(vform);
2862   int pairs = laneCount / 2;
2863   for (int i = 0; i < pairs; ++i) {
2864     result[2 * i] = src1.Uint(vform, i);
2865     result[(2 * i) + 1] = src2.Uint(vform, i);
2866   }
2867 
2868   dst.SetUintArray(vform, result);
2869   return dst;
2870 }
2871 
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2872 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,
2873                                const LogicVRegister& src1,
2874                                const LogicVRegister& src2) {
2875   uint64_t result[16];
2876   int laneCount = LaneCountFromFormat(vform);
2877   int pairs = laneCount / 2;
2878   for (int i = 0; i < pairs; ++i) {
2879     result[2 * i] = src1.Uint(vform, pairs + i);
2880     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
2881   }
2882 
2883   dst.SetUintArray(vform, result);
2884   return dst;
2885 }
2886 
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2887 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,
2888                                const LogicVRegister& src1,
2889                                const LogicVRegister& src2) {
2890   uint64_t result[32];
2891   int laneCount = LaneCountFromFormat(vform);
2892   for (int i = 0; i < laneCount; ++i) {
2893     result[i] = src1.Uint(vform, i);
2894     result[laneCount + i] = src2.Uint(vform, i);
2895   }
2896 
2897   dst.ClearForWrite(vform);
2898   for (int i = 0; i < laneCount; ++i) {
2899     dst.SetUint(vform, i, result[2 * i]);
2900   }
2901   return dst;
2902 }
2903 
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2904 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,
2905                                const LogicVRegister& src1,
2906                                const LogicVRegister& src2) {
2907   uint64_t result[32];
2908   int laneCount = LaneCountFromFormat(vform);
2909   for (int i = 0; i < laneCount; ++i) {
2910     result[i] = src1.Uint(vform, i);
2911     result[laneCount + i] = src2.Uint(vform, i);
2912   }
2913 
2914   dst.ClearForWrite(vform);
2915   for (int i = 0; i < laneCount; ++i) {
2916     dst.SetUint(vform, i, result[(2 * i) + 1]);
2917   }
2918   return dst;
2919 }
2920 
2921 template <typename T>
FPAdd(T op1,T op2)2922 T Simulator::FPAdd(T op1, T op2) {
2923   T result = FPProcessNaNs(op1, op2);
2924   if (std::isnan(result)) return result;
2925 
2926   if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
2927     // inf + -inf returns the default NaN.
2928     FPProcessException();
2929     return FPDefaultNaN<T>();
2930   } else {
2931     // Other cases should be handled by standard arithmetic.
2932     return op1 + op2;
2933   }
2934 }
2935 
2936 template <typename T>
FPSub(T op1,T op2)2937 T Simulator::FPSub(T op1, T op2) {
2938   // NaNs should be handled elsewhere.
2939   DCHECK(!std::isnan(op1) && !std::isnan(op2));
2940 
2941   if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
2942     // inf - inf returns the default NaN.
2943     FPProcessException();
2944     return FPDefaultNaN<T>();
2945   } else {
2946     // Other cases should be handled by standard arithmetic.
2947     return op1 - op2;
2948   }
2949 }
2950 
2951 template <typename T>
FPMul(T op1,T op2)2952 T Simulator::FPMul(T op1, T op2) {
2953   // NaNs should be handled elsewhere.
2954   DCHECK(!std::isnan(op1) && !std::isnan(op2));
2955 
2956   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2957     // inf * 0.0 returns the default NaN.
2958     FPProcessException();
2959     return FPDefaultNaN<T>();
2960   } else {
2961     // Other cases should be handled by standard arithmetic.
2962     return op1 * op2;
2963   }
2964 }
2965 
2966 template <typename T>
FPMulx(T op1,T op2)2967 T Simulator::FPMulx(T op1, T op2) {
2968   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2969     // inf * 0.0 returns +/-2.0.
2970     T two = 2.0;
2971     return copysign(1.0, op1) * copysign(1.0, op2) * two;
2972   }
2973   return FPMul(op1, op2);
2974 }
2975 
2976 template <typename T>
FPMulAdd(T a,T op1,T op2)2977 T Simulator::FPMulAdd(T a, T op1, T op2) {
2978   T result = FPProcessNaNs3(a, op1, op2);
2979 
2980   T sign_a = copysign(1.0, a);
2981   T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
2982   bool isinf_prod = std::isinf(op1) || std::isinf(op2);
2983   bool operation_generates_nan =
2984       (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
2985       (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
2986       (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
2987 
2988   if (std::isnan(result)) {
2989     // Generated NaNs override quiet NaNs propagated from a.
2990     if (operation_generates_nan && IsQuietNaN(a)) {
2991       FPProcessException();
2992       return FPDefaultNaN<T>();
2993     } else {
2994       return result;
2995     }
2996   }
2997 
2998   // If the operation would produce a NaN, return the default NaN.
2999   if (operation_generates_nan) {
3000     FPProcessException();
3001     return FPDefaultNaN<T>();
3002   }
3003 
3004   // Work around broken fma implementations for exact zero results: The sign of
3005   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3006   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3007     return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3008   }
3009 
3010   result = FusedMultiplyAdd(op1, op2, a);
3011   DCHECK(!std::isnan(result));
3012 
3013   // Work around broken fma implementations for rounded zero results: If a is
3014   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3015   if ((a == 0.0) && (result == 0.0)) {
3016     return copysign(0.0, sign_prod);
3017   }
3018 
3019   return result;
3020 }
3021 
3022 template <typename T>
FPDiv(T op1,T op2)3023 T Simulator::FPDiv(T op1, T op2) {
3024   // NaNs should be handled elsewhere.
3025   DCHECK(!std::isnan(op1) && !std::isnan(op2));
3026 
3027   if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3028     // inf / inf and 0.0 / 0.0 return the default NaN.
3029     FPProcessException();
3030     return FPDefaultNaN<T>();
3031   } else {
3032     if (op2 == 0.0) {
3033       FPProcessException();
3034       if (!std::isnan(op1)) {
3035         double op1_sign = copysign(1.0, op1);
3036         double op2_sign = copysign(1.0, op2);
3037         return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3038       }
3039     }
3040 
3041     // Other cases should be handled by standard arithmetic.
3042     return op1 / op2;
3043   }
3044 }
3045 
3046 template <typename T>
FPSqrt(T op)3047 T Simulator::FPSqrt(T op) {
3048   if (std::isnan(op)) {
3049     return FPProcessNaN(op);
3050   } else if (op < 0.0) {
3051     FPProcessException();
3052     return FPDefaultNaN<T>();
3053   } else {
3054     return sqrt(op);
3055   }
3056 }
3057 
3058 template <typename T>
FPMax(T a,T b)3059 T Simulator::FPMax(T a, T b) {
3060   T result = FPProcessNaNs(a, b);
3061   if (std::isnan(result)) return result;
3062 
3063   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3064     // a and b are zero, and the sign differs: return +0.0.
3065     return 0.0;
3066   } else {
3067     return (a > b) ? a : b;
3068   }
3069 }
3070 
3071 template <typename T>
FPMaxNM(T a,T b)3072 T Simulator::FPMaxNM(T a, T b) {
3073   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3074     a = kFP64NegativeInfinity;
3075   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3076     b = kFP64NegativeInfinity;
3077   }
3078 
3079   T result = FPProcessNaNs(a, b);
3080   return std::isnan(result) ? result : FPMax(a, b);
3081 }
3082 
3083 template <typename T>
FPMin(T a,T b)3084 T Simulator::FPMin(T a, T b) {
3085   T result = FPProcessNaNs(a, b);
3086   if (std::isnan(result)) return result;
3087 
3088   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3089     // a and b are zero, and the sign differs: return -0.0.
3090     return -0.0;
3091   } else {
3092     return (a < b) ? a : b;
3093   }
3094 }
3095 
3096 template <typename T>
FPMinNM(T a,T b)3097 T Simulator::FPMinNM(T a, T b) {
3098   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3099     a = kFP64PositiveInfinity;
3100   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3101     b = kFP64PositiveInfinity;
3102   }
3103 
3104   T result = FPProcessNaNs(a, b);
3105   return std::isnan(result) ? result : FPMin(a, b);
3106 }
3107 
3108 template <typename T>
FPRecipStepFused(T op1,T op2)3109 T Simulator::FPRecipStepFused(T op1, T op2) {
3110   const T two = 2.0;
3111   if ((std::isinf(op1) && (op2 == 0.0)) ||
3112       ((op1 == 0.0) && (std::isinf(op2)))) {
3113     return two;
3114   } else if (std::isinf(op1) || std::isinf(op2)) {
3115     // Return +inf if signs match, otherwise -inf.
3116     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3117                                           : kFP64NegativeInfinity;
3118   } else {
3119     return FusedMultiplyAdd(op1, op2, two);
3120   }
3121 }
3122 
3123 template <typename T>
FPRSqrtStepFused(T op1,T op2)3124 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3125   const T one_point_five = 1.5;
3126   const T two = 2.0;
3127 
3128   if ((std::isinf(op1) && (op2 == 0.0)) ||
3129       ((op1 == 0.0) && (std::isinf(op2)))) {
3130     return one_point_five;
3131   } else if (std::isinf(op1) || std::isinf(op2)) {
3132     // Return +inf if signs match, otherwise -inf.
3133     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3134                                           : kFP64NegativeInfinity;
3135   } else {
3136     // The multiply-add-halve operation must be fully fused, so avoid interim
3137     // rounding by checking which operand can be losslessly divided by two
3138     // before doing the multiply-add.
3139     if (std::isnormal(op1 / two)) {
3140       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3141     } else if (std::isnormal(op2 / two)) {
3142       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3143     } else {
3144       // Neither operand is normal after halving: the result is dominated by
3145       // the addition term, so just return that.
3146       return one_point_five;
3147     }
3148   }
3149 }
3150 
FPRoundInt(double value,FPRounding round_mode)3151 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3152   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3153       (value == kFP64NegativeInfinity)) {
3154     return value;
3155   } else if (std::isnan(value)) {
3156     return FPProcessNaN(value);
3157   }
3158 
3159   double int_result = std::floor(value);
3160   double error = value - int_result;
3161   switch (round_mode) {
3162     case FPTieAway: {
3163       // Take care of correctly handling the range ]-0.5, -0.0], which must
3164       // yield -0.0.
3165       if ((-0.5 < value) && (value < 0.0)) {
3166         int_result = -0.0;
3167 
3168       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3169         // If the error is greater than 0.5, or is equal to 0.5 and the integer
3170         // result is positive, round up.
3171         int_result++;
3172       }
3173       break;
3174     }
3175     case FPTieEven: {
3176       // Take care of correctly handling the range [-0.5, -0.0], which must
3177       // yield -0.0.
3178       if ((-0.5 <= value) && (value < 0.0)) {
3179         int_result = -0.0;
3180 
3181         // If the error is greater than 0.5, or is equal to 0.5 and the integer
3182         // result is odd, round up.
3183       } else if ((error > 0.5) ||
3184                  ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3185         int_result++;
3186       }
3187       break;
3188     }
3189     case FPZero: {
3190       // If value>0 then we take floor(value)
3191       // otherwise, ceil(value).
3192       if (value < 0) {
3193         int_result = ceil(value);
3194       }
3195       break;
3196     }
3197     case FPNegativeInfinity: {
3198       // We always use floor(value).
3199       break;
3200     }
3201     case FPPositiveInfinity: {
3202       // Take care of correctly handling the range ]-1.0, -0.0], which must
3203       // yield -0.0.
3204       if ((-1.0 < value) && (value < 0.0)) {
3205         int_result = -0.0;
3206 
3207         // If the error is non-zero, round up.
3208       } else if (error > 0.0) {
3209         int_result++;
3210       }
3211       break;
3212     }
3213     default:
3214       UNIMPLEMENTED();
3215   }
3216   return int_result;
3217 }
3218 
FPToInt32(double value,FPRounding rmode)3219 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3220   value = FPRoundInt(value, rmode);
3221   if (value >= kWMaxInt) {
3222     return kWMaxInt;
3223   } else if (value < kWMinInt) {
3224     return kWMinInt;
3225   }
3226   return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3227 }
3228 
FPToInt64(double value,FPRounding rmode)3229 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3230   value = FPRoundInt(value, rmode);
3231   if (value >= kXMaxInt) {
3232     return kXMaxInt;
3233   } else if (value < kXMinInt) {
3234     return kXMinInt;
3235   }
3236   return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3237 }
3238 
FPToUInt32(double value,FPRounding rmode)3239 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3240   value = FPRoundInt(value, rmode);
3241   if (value >= kWMaxUInt) {
3242     return kWMaxUInt;
3243   } else if (value < 0.0) {
3244     return 0;
3245   }
3246   return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3247 }
3248 
FPToUInt64(double value,FPRounding rmode)3249 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3250   value = FPRoundInt(value, rmode);
3251   if (value >= kXMaxUInt) {
3252     return kXMaxUInt;
3253   } else if (value < 0.0) {
3254     return 0;
3255   }
3256   return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3257 }
3258 
3259 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                      \
3260   template <typename T>                                                \
3261   LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3262                                const LogicVRegister& src1,             \
3263                                const LogicVRegister& src2) {           \
3264     dst.ClearForWrite(vform);                                          \
3265     for (int i = 0; i < LaneCountFromFormat(vform); i++) {             \
3266       T op1 = src1.Float<T>(i);                                        \
3267       T op2 = src2.Float<T>(i);                                        \
3268       T result;                                                        \
3269       if (PROCNAN) {                                                   \
3270         result = FPProcessNaNs(op1, op2);                              \
3271         if (!std::isnan(result)) {                                     \
3272           result = OP(op1, op2);                                       \
3273         }                                                              \
3274       } else {                                                         \
3275         result = OP(op1, op2);                                         \
3276       }                                                                \
3277       dst.SetFloat(i, result);                                         \
3278     }                                                                  \
3279     return dst;                                                        \
3280   }                                                                    \
3281                                                                        \
3282   LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3283                                const LogicVRegister& src1,             \
3284                                const LogicVRegister& src2) {           \
3285     if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {               \
3286       FN<float>(vform, dst, src1, src2);                               \
3287     } else {                                                           \
3288       DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);          \
3289       FN<double>(vform, dst, src1, src2);                              \
3290     }                                                                  \
3291     return dst;                                                        \
3292   }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)3293 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3294 #undef DEFINE_NEON_FP_VECTOR_OP
3295 
3296 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,
3297                                 const LogicVRegister& src1,
3298                                 const LogicVRegister& src2) {
3299   SimVRegister temp;
3300   LogicVRegister product = fmul(vform, temp, src1, src2);
3301   return fneg(vform, dst, product);
3302 }
3303 
3304 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3305 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3306                                  const LogicVRegister& src1,
3307                                  const LogicVRegister& src2) {
3308   dst.ClearForWrite(vform);
3309   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3310     T op1 = -src1.Float<T>(i);
3311     T op2 = src2.Float<T>(i);
3312     T result = FPProcessNaNs(op1, op2);
3313     dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3314   }
3315   return dst;
3316 }
3317 
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3318 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3319                                  const LogicVRegister& src1,
3320                                  const LogicVRegister& src2) {
3321   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3322     frecps<float>(vform, dst, src1, src2);
3323   } else {
3324     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3325     frecps<double>(vform, dst, src1, src2);
3326   }
3327   return dst;
3328 }
3329 
3330 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3331 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3332                                   const LogicVRegister& src1,
3333                                   const LogicVRegister& src2) {
3334   dst.ClearForWrite(vform);
3335   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3336     T op1 = -src1.Float<T>(i);
3337     T op2 = src2.Float<T>(i);
3338     T result = FPProcessNaNs(op1, op2);
3339     dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3340   }
3341   return dst;
3342 }
3343 
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3344 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3345                                   const LogicVRegister& src1,
3346                                   const LogicVRegister& src2) {
3347   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3348     frsqrts<float>(vform, dst, src1, src2);
3349   } else {
3350     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3351     frsqrts<double>(vform, dst, src1, src2);
3352   }
3353   return dst;
3354 }
3355 
3356 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3357 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3358                                const LogicVRegister& src1,
3359                                const LogicVRegister& src2, Condition cond) {
3360   dst.ClearForWrite(vform);
3361   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3362     bool result = false;
3363     T op1 = src1.Float<T>(i);
3364     T op2 = src2.Float<T>(i);
3365     T nan_result = FPProcessNaNs(op1, op2);
3366     if (!std::isnan(nan_result)) {
3367       switch (cond) {
3368         case eq:
3369           result = (op1 == op2);
3370           break;
3371         case ge:
3372           result = (op1 >= op2);
3373           break;
3374         case gt:
3375           result = (op1 > op2);
3376           break;
3377         case le:
3378           result = (op1 <= op2);
3379           break;
3380         case lt:
3381           result = (op1 < op2);
3382           break;
3383         default:
3384           UNREACHABLE();
3385       }
3386     }
3387     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3388   }
3389   return dst;
3390 }
3391 
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3392 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3393                                const LogicVRegister& src1,
3394                                const LogicVRegister& src2, Condition cond) {
3395   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3396     fcmp<float>(vform, dst, src1, src2, cond);
3397   } else {
3398     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3399     fcmp<double>(vform, dst, src1, src2, cond);
3400   }
3401   return dst;
3402 }
3403 
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)3404 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,
3405                                     const LogicVRegister& src, Condition cond) {
3406   SimVRegister temp;
3407   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3408     LogicVRegister zero_reg =
3409         dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));
3410     fcmp<float>(vform, dst, src, zero_reg, cond);
3411   } else {
3412     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3413     LogicVRegister zero_reg =
3414         dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));
3415     fcmp<double>(vform, dst, src, zero_reg, cond);
3416   }
3417   return dst;
3418 }
3419 
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3420 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,
3421                                   const LogicVRegister& src1,
3422                                   const LogicVRegister& src2, Condition cond) {
3423   SimVRegister temp1, temp2;
3424   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3425     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
3426     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
3427     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
3428   } else {
3429     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3430     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
3431     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
3432     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
3433   }
3434   return dst;
3435 }
3436 
3437 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3438 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3439                                const LogicVRegister& src1,
3440                                const LogicVRegister& src2) {
3441   dst.ClearForWrite(vform);
3442   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3443     T op1 = src1.Float<T>(i);
3444     T op2 = src2.Float<T>(i);
3445     T acc = dst.Float<T>(i);
3446     T result = FPMulAdd(acc, op1, op2);
3447     dst.SetFloat(i, result);
3448   }
3449   return dst;
3450 }
3451 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3452 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3453                                const LogicVRegister& src1,
3454                                const LogicVRegister& src2) {
3455   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3456     fmla<float>(vform, dst, src1, src2);
3457   } else {
3458     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3459     fmla<double>(vform, dst, src1, src2);
3460   }
3461   return dst;
3462 }
3463 
3464 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3465 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3466                                const LogicVRegister& src1,
3467                                const LogicVRegister& src2) {
3468   dst.ClearForWrite(vform);
3469   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3470     T op1 = -src1.Float<T>(i);
3471     T op2 = src2.Float<T>(i);
3472     T acc = dst.Float<T>(i);
3473     T result = FPMulAdd(acc, op1, op2);
3474     dst.SetFloat(i, result);
3475   }
3476   return dst;
3477 }
3478 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3479 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3480                                const LogicVRegister& src1,
3481                                const LogicVRegister& src2) {
3482   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3483     fmls<float>(vform, dst, src1, src2);
3484   } else {
3485     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3486     fmls<double>(vform, dst, src1, src2);
3487   }
3488   return dst;
3489 }
3490 
3491 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3492 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3493                                const LogicVRegister& src) {
3494   dst.ClearForWrite(vform);
3495   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3496     T op = src.Float<T>(i);
3497     op = -op;
3498     dst.SetFloat(i, op);
3499   }
3500   return dst;
3501 }
3502 
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3503 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3504                                const LogicVRegister& src) {
3505   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3506     fneg<float>(vform, dst, src);
3507   } else {
3508     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3509     fneg<double>(vform, dst, src);
3510   }
3511   return dst;
3512 }
3513 
3514 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3515 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3516                                 const LogicVRegister& src) {
3517   dst.ClearForWrite(vform);
3518   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3519     T op = src.Float<T>(i);
3520     if (copysign(1.0, op) < 0.0) {
3521       op = -op;
3522     }
3523     dst.SetFloat(i, op);
3524   }
3525   return dst;
3526 }
3527 
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3528 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3529                                 const LogicVRegister& src) {
3530   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3531     fabs_<float>(vform, dst, src);
3532   } else {
3533     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3534     fabs_<double>(vform, dst, src);
3535   }
3536   return dst;
3537 }
3538 
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3539 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,
3540                                const LogicVRegister& src1,
3541                                const LogicVRegister& src2) {
3542   SimVRegister temp;
3543   fsub(vform, temp, src1, src2);
3544   fabs_(vform, dst, temp);
3545   return dst;
3546 }
3547 
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3548 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,
3549                                 const LogicVRegister& src) {
3550   dst.ClearForWrite(vform);
3551   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3552     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3553       float result = FPSqrt(src.Float<float>(i));
3554       dst.SetFloat(i, result);
3555     }
3556   } else {
3557     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3558     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3559       double result = FPSqrt(src.Float<double>(i));
3560       dst.SetFloat(i, result);
3561     }
3562   }
3563   return dst;
3564 }
3565 
3566 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                             \
3567   LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3568                                 const LogicVRegister& src1,             \
3569                                 const LogicVRegister& src2) {           \
3570     SimVRegister temp1, temp2;                                          \
3571     uzp1(vform, temp1, src1, src2);                                     \
3572     uzp2(vform, temp2, src1, src2);                                     \
3573     FN(vform, dst, temp1, temp2);                                       \
3574     return dst;                                                         \
3575   }                                                                     \
3576                                                                         \
3577   LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3578                                 const LogicVRegister& src) {            \
3579     if (vform == kFormatS) {                                            \
3580       float result = OP(src.Float<float>(0), src.Float<float>(1));      \
3581       dst.SetFloat(0, result);                                          \
3582     } else {                                                            \
3583       DCHECK_EQ(vform, kFormatD);                                       \
3584       double result = OP(src.Float<double>(0), src.Float<double>(1));   \
3585       dst.SetFloat(0, result);                                          \
3586     }                                                                   \
3587     dst.ClearForWrite(vform);                                           \
3588     return dst;                                                         \
3589   }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)3590 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
3591 #undef DEFINE_NEON_FP_PAIR_OP
3592 
3593 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,
3594                                    const LogicVRegister& src, FPMinMaxOp Op) {
3595   DCHECK_EQ(vform, kFormat4S);
3596   USE(vform);
3597   float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
3598   float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
3599   float result = (this->*Op)(result1, result2);
3600   dst.ClearForWrite(kFormatS);
3601   dst.SetFloat<float>(0, result);
3602   return dst;
3603 }
3604 
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3605 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,
3606                                 const LogicVRegister& src) {
3607   return FMinMaxV(vform, dst, src, &Simulator::FPMax);
3608 }
3609 
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3610 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,
3611                                 const LogicVRegister& src) {
3612   return FMinMaxV(vform, dst, src, &Simulator::FPMin);
3613 }
3614 
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3615 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,
3616                                   const LogicVRegister& src) {
3617   return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
3618 }
3619 
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3620 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,
3621                                   const LogicVRegister& src) {
3622   return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
3623 }
3624 
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3625 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,
3626                                const LogicVRegister& src1,
3627                                const LogicVRegister& src2, int index) {
3628   dst.ClearForWrite(vform);
3629   SimVRegister temp;
3630   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3631     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3632     fmul<float>(vform, dst, src1, index_reg);
3633   } else {
3634     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3635     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3636     fmul<double>(vform, dst, src1, index_reg);
3637   }
3638   return dst;
3639 }
3640 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3641 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3642                                const LogicVRegister& src1,
3643                                const LogicVRegister& src2, int index) {
3644   dst.ClearForWrite(vform);
3645   SimVRegister temp;
3646   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3647     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3648     fmla<float>(vform, dst, src1, index_reg);
3649   } else {
3650     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3651     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3652     fmla<double>(vform, dst, src1, index_reg);
3653   }
3654   return dst;
3655 }
3656 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3657 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3658                                const LogicVRegister& src1,
3659                                const LogicVRegister& src2, int index) {
3660   dst.ClearForWrite(vform);
3661   SimVRegister temp;
3662   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3663     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3664     fmls<float>(vform, dst, src1, index_reg);
3665   } else {
3666     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3667     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3668     fmls<double>(vform, dst, src1, index_reg);
3669   }
3670   return dst;
3671 }
3672 
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3673 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,
3674                                 const LogicVRegister& src1,
3675                                 const LogicVRegister& src2, int index) {
3676   dst.ClearForWrite(vform);
3677   SimVRegister temp;
3678   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3679     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3680     fmulx<float>(vform, dst, src1, index_reg);
3681 
3682   } else {
3683     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3684     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3685     fmulx<double>(vform, dst, src1, index_reg);
3686   }
3687   return dst;
3688 }
3689 
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception)3690 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,
3691                                 const LogicVRegister& src,
3692                                 FPRounding rounding_mode,
3693                                 bool inexact_exception) {
3694   dst.ClearForWrite(vform);
3695   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3696     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3697       float input = src.Float<float>(i);
3698       float rounded = FPRoundInt(input, rounding_mode);
3699       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3700         FPProcessException();
3701       }
3702       dst.SetFloat<float>(i, rounded);
3703     }
3704   } else {
3705     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3706     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3707       double input = src.Float<double>(i);
3708       double rounded = FPRoundInt(input, rounding_mode);
3709       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3710         FPProcessException();
3711       }
3712       dst.SetFloat<double>(i, rounded);
3713     }
3714   }
3715   return dst;
3716 }
3717 
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)3718 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,
3719                                 const LogicVRegister& src,
3720                                 FPRounding rounding_mode, int fbits) {
3721   dst.ClearForWrite(vform);
3722   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3723     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3724       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3725       dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
3726     }
3727   } else {
3728     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3729     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3730       double op = src.Float<double>(i) * std::pow(2.0, fbits);
3731       dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
3732     }
3733   }
3734   return dst;
3735 }
3736 
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)3737 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,
3738                                 const LogicVRegister& src,
3739                                 FPRounding rounding_mode, int fbits) {
3740   dst.ClearForWrite(vform);
3741   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3742     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3743       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3744       dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
3745     }
3746   } else {
3747     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3748     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3749       double op = src.Float<double>(i) * std::pow(2.0, fbits);
3750       dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
3751     }
3752   }
3753   return dst;
3754 }
3755 
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3756 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,
3757                                 const LogicVRegister& src) {
3758   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3759     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3760       dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
3761     }
3762   } else {
3763     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3764     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3765       dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
3766     }
3767   }
3768   return dst;
3769 }
3770 
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3771 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,
3772                                  const LogicVRegister& src) {
3773   int lane_count = LaneCountFromFormat(vform);
3774   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3775     for (int i = 0; i < lane_count; i++) {
3776       dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
3777     }
3778   } else {
3779     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3780     for (int i = 0; i < lane_count; i++) {
3781       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
3782     }
3783   }
3784   return dst;
3785 }
3786 
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3787 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
3788                                 const LogicVRegister& src) {
3789   if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3790     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3791       dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
3792     }
3793   } else {
3794     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3795     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3796       dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
3797     }
3798   }
3799   return dst;
3800 }
3801 
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3802 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,
3803                                  const LogicVRegister& src) {
3804   int lane_count = LaneCountFromFormat(vform) / 2;
3805   if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3806     for (int i = lane_count - 1; i >= 0; i--) {
3807       dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
3808     }
3809   } else {
3810     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3811     for (int i = lane_count - 1; i >= 0; i--) {
3812       dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
3813     }
3814   }
3815   return dst;
3816 }
3817 
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3818 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,
3819                                  const LogicVRegister& src) {
3820   dst.ClearForWrite(vform);
3821   DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3822   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3823     dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
3824   }
3825   return dst;
3826 }
3827 
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3828 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,
3829                                   const LogicVRegister& src) {
3830   DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3831   int lane_count = LaneCountFromFormat(vform) / 2;
3832   for (int i = lane_count - 1; i >= 0; i--) {
3833     dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
3834   }
3835   return dst;
3836 }
3837 
3838 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)3839 double Simulator::recip_sqrt_estimate(double a) {
3840   int q0, q1, s;
3841   double r;
3842   if (a < 0.5) {
3843     q0 = static_cast<int>(a * 512.0);
3844     r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
3845   } else {
3846     q1 = static_cast<int>(a * 256.0);
3847     r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
3848   }
3849   s = static_cast<int>(256.0 * r + 0.5);
3850   return static_cast<double>(s) / 256.0;
3851 }
3852 
3853 namespace {
3854 
Bits(uint64_t val,int start_bit,int end_bit)3855 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
3856   return unsigned_bitextract_64(start_bit, end_bit, val);
3857 }
3858 
3859 }  // anonymous namespace
3860 
3861 template <typename T>
FPRecipSqrtEstimate(T op)3862 T Simulator::FPRecipSqrtEstimate(T op) {
3863   static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3864                 "T must be a float or double");
3865 
3866   if (std::isnan(op)) {
3867     return FPProcessNaN(op);
3868   } else if (op == 0.0) {
3869     if (copysign(1.0, op) < 0.0) {
3870       return kFP64NegativeInfinity;
3871     } else {
3872       return kFP64PositiveInfinity;
3873     }
3874   } else if (copysign(1.0, op) < 0.0) {
3875     FPProcessException();
3876     return FPDefaultNaN<T>();
3877   } else if (std::isinf(op)) {
3878     return 0.0;
3879   } else {
3880     uint64_t fraction;
3881     int32_t exp, result_exp;
3882 
3883     if (sizeof(T) == sizeof(float)) {
3884       exp = static_cast<int32_t>(float_exp(op));
3885       fraction = float_mantissa(op);
3886       fraction <<= 29;
3887     } else {
3888       exp = static_cast<int32_t>(double_exp(op));
3889       fraction = double_mantissa(op);
3890     }
3891 
3892     if (exp == 0) {
3893       while (Bits(fraction, 51, 51) == 0) {
3894         fraction = Bits(fraction, 50, 0) << 1;
3895         exp -= 1;
3896       }
3897       fraction = Bits(fraction, 50, 0) << 1;
3898     }
3899 
3900     double scaled;
3901     if (Bits(exp, 0, 0) == 0) {
3902       scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
3903     } else {
3904       scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
3905     }
3906 
3907     if (sizeof(T) == sizeof(float)) {
3908       result_exp = (380 - exp) / 2;
3909     } else {
3910       result_exp = (3068 - exp) / 2;
3911     }
3912 
3913     uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));
3914 
3915     if (sizeof(T) == sizeof(float)) {
3916       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
3917       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
3918       return float_pack(0, exp_bits, est_bits);
3919     } else {
3920       return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
3921     }
3922   }
3923 }
3924 
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3925 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,
3926                                   const LogicVRegister& src) {
3927   dst.ClearForWrite(vform);
3928   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3929     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3930       float input = src.Float<float>(i);
3931       dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
3932     }
3933   } else {
3934     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3935     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3936       double input = src.Float<double>(i);
3937       dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
3938     }
3939   }
3940   return dst;
3941 }
3942 
3943 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)3944 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
3945   static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3946                 "T must be a float or double");
3947   uint32_t sign;
3948 
3949   if (sizeof(T) == sizeof(float)) {
3950     sign = float_sign(op);
3951   } else {
3952     sign = double_sign(op);
3953   }
3954 
3955   if (std::isnan(op)) {
3956     return FPProcessNaN(op);
3957   } else if (std::isinf(op)) {
3958     return (sign == 1) ? -0.0 : 0.0;
3959   } else if (op == 0.0) {
3960     FPProcessException();  // FPExc_DivideByZero exception.
3961     return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3962   } else if (((sizeof(T) == sizeof(float)) &&
3963               (std::fabs(op) < std::pow(2.0, -128.0))) ||
3964              ((sizeof(T) == sizeof(double)) &&
3965               (std::fabs(op) < std::pow(2.0, -1024.0)))) {
3966     bool overflow_to_inf = false;
3967     switch (rounding) {
3968       case FPTieEven:
3969         overflow_to_inf = true;
3970         break;
3971       case FPPositiveInfinity:
3972         overflow_to_inf = (sign == 0);
3973         break;
3974       case FPNegativeInfinity:
3975         overflow_to_inf = (sign == 1);
3976         break;
3977       case FPZero:
3978         overflow_to_inf = false;
3979         break;
3980       default:
3981         break;
3982     }
3983     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
3984     if (overflow_to_inf) {
3985       return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3986     } else {
3987       // Return FPMaxNormal(sign).
3988       if (sizeof(T) == sizeof(float)) {
3989         return float_pack(sign, 0xFE, 0x07FFFFF);
3990       } else {
3991         return double_pack(sign, 0x7FE, 0x0FFFFFFFFFFFFFl);
3992       }
3993     }
3994   } else {
3995     uint64_t fraction;
3996     int32_t exp, result_exp;
3997     uint32_t sign;
3998 
3999     if (sizeof(T) == sizeof(float)) {
4000       sign = float_sign(op);
4001       exp = static_cast<int32_t>(float_exp(op));
4002       fraction = float_mantissa(op);
4003       fraction <<= 29;
4004     } else {
4005       sign = double_sign(op);
4006       exp = static_cast<int32_t>(double_exp(op));
4007       fraction = double_mantissa(op);
4008     }
4009 
4010     if (exp == 0) {
4011       if (Bits(fraction, 51, 51) == 0) {
4012         exp -= 1;
4013         fraction = Bits(fraction, 49, 0) << 2;
4014       } else {
4015         fraction = Bits(fraction, 50, 0) << 1;
4016       }
4017     }
4018 
4019     double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4020 
4021     if (sizeof(T) == sizeof(float)) {
4022       result_exp = 253 - exp;
4023     } else {
4024       result_exp = 2045 - exp;
4025     }
4026 
4027     double estimate = recip_estimate(scaled);
4028 
4029     fraction = double_mantissa(estimate);
4030     if (result_exp == 0) {
4031       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4032     } else if (result_exp == -1) {
4033       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4034       result_exp = 0;
4035     }
4036     if (sizeof(T) == sizeof(float)) {
4037       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4038       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4039       return float_pack(sign, exp_bits, frac_bits);
4040     } else {
4041       return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4042     }
4043   }
4044 }
4045 
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)4046 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,
4047                                  const LogicVRegister& src, FPRounding round) {
4048   dst.ClearForWrite(vform);
4049   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4050     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4051       float input = src.Float<float>(i);
4052       dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4053     }
4054   } else {
4055     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4056     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4057       double input = src.Float<double>(i);
4058       dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4059     }
4060   }
4061   return dst;
4062 }
4063 
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4064 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,
4065                                   const LogicVRegister& src) {
4066   dst.ClearForWrite(vform);
4067   uint64_t operand;
4068   uint32_t result;
4069   double dp_operand, dp_result;
4070   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4071     operand = src.Uint(vform, i);
4072     if (operand <= 0x3FFFFFFF) {
4073       result = 0xFFFFFFFF;
4074     } else {
4075       dp_operand = operand * std::pow(2.0, -32);
4076       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4077       result = static_cast<uint32_t>(dp_result);
4078     }
4079     dst.SetUint(vform, i, result);
4080   }
4081   return dst;
4082 }
4083 
4084 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)4085 double Simulator::recip_estimate(double a) {
4086   int q, s;
4087   double r;
4088   q = static_cast<int>(a * 512.0);
4089   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4090   s = static_cast<int>(256.0 * r + 0.5);
4091   return static_cast<double>(s) / 256.0;
4092 }
4093 
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4094 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,
4095                                  const LogicVRegister& src) {
4096   dst.ClearForWrite(vform);
4097   uint64_t operand;
4098   uint32_t result;
4099   double dp_operand, dp_result;
4100   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4101     operand = src.Uint(vform, i);
4102     if (operand <= 0x7FFFFFFF) {
4103       result = 0xFFFFFFFF;
4104     } else {
4105       dp_operand = operand * std::pow(2.0, -32);
4106       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4107       result = static_cast<uint32_t>(dp_result);
4108     }
4109     dst.SetUint(vform, i, result);
4110   }
4111   return dst;
4112 }
4113 
4114 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4115 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4116                                  const LogicVRegister& src) {
4117   dst.ClearForWrite(vform);
4118   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4119     T op = src.Float<T>(i);
4120     T result;
4121     if (std::isnan(op)) {
4122       result = FPProcessNaN(op);
4123     } else {
4124       int exp;
4125       uint32_t sign;
4126       if (sizeof(T) == sizeof(float)) {
4127         sign = float_sign(op);
4128         exp = static_cast<int>(float_exp(op));
4129         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4130         result = float_pack(sign, exp, 0);
4131       } else {
4132         sign = double_sign(op);
4133         exp = static_cast<int>(double_exp(op));
4134         exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4135         result = double_pack(sign, exp, 0);
4136       }
4137     }
4138     dst.SetFloat(i, result);
4139   }
4140   return dst;
4141 }
4142 
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4143 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4144                                  const LogicVRegister& src) {
4145   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4146     frecpx<float>(vform, dst, src);
4147   } else {
4148     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4149     frecpx<double>(vform, dst, src);
4150   }
4151   return dst;
4152 }
4153 
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4154 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,
4155                                 const LogicVRegister& src, int fbits,
4156                                 FPRounding round) {
4157   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4158     if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4159       float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4160       dst.SetFloat<float>(i, result);
4161     } else {
4162       DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4163       double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4164       dst.SetFloat<double>(i, result);
4165     }
4166   }
4167   return dst;
4168 }
4169 
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4170 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,
4171                                 const LogicVRegister& src, int fbits,
4172                                 FPRounding round) {
4173   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4174     if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4175       float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4176       dst.SetFloat<float>(i, result);
4177     } else {
4178       DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4179       double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4180       dst.SetFloat<double>(i, result);
4181     }
4182   }
4183   return dst;
4184 }
4185 
4186 #endif  // USE_SIMULATOR
4187 
4188 }  // namespace internal
4189 }  // namespace v8
4190 
4191 #endif  // V8_TARGET_ARCH_ARM64
4192