1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28 
29 #include <cmath>
30 
31 #include "simulator-aarch64.h"
32 
33 namespace vixl {
34 namespace aarch64 {
35 
36 using vixl::internal::SimFloat16;
37 
38 template <typename T>
IsFloat64()39 bool IsFloat64() {
40   return false;
41 }
42 template <>
IsFloat64()43 bool IsFloat64<double>() {
44   return true;
45 }
46 
47 template <typename T>
IsFloat32()48 bool IsFloat32() {
49   return false;
50 }
51 template <>
IsFloat32()52 bool IsFloat32<float>() {
53   return true;
54 }
55 
56 template <typename T>
IsFloat16()57 bool IsFloat16() {
58   return false;
59 }
60 template <>
IsFloat16()61 bool IsFloat16<Float16>() {
62   return true;
63 }
64 template <>
IsFloat16()65 bool IsFloat16<SimFloat16>() {
66   return true;
67 }
68 
69 template <>
FPDefaultNaN()70 double Simulator::FPDefaultNaN<double>() {
71   return kFP64DefaultNaN;
72 }
73 
74 
75 template <>
FPDefaultNaN()76 float Simulator::FPDefaultNaN<float>() {
77   return kFP32DefaultNaN;
78 }
79 
80 
81 template <>
FPDefaultNaN()82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83   return SimFloat16(kFP16DefaultNaN);
84 }
85 
86 
FixedToDouble(int64_t src,int fbits,FPRounding round)87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88   if (src >= 0) {
89     return UFixedToDouble(src, fbits, round);
90   } else if (src == INT64_MIN) {
91     return -UFixedToDouble(src, fbits, round);
92   } else {
93     return -UFixedToDouble(-src, fbits, round);
94   }
95 }
96 
97 
UFixedToDouble(uint64_t src,int fbits,FPRounding round)98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99   // An input of 0 is a special case because the result is effectively
100   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101   if (src == 0) {
102     return 0.0;
103   }
104 
105   // Calculate the exponent. The highest significant bit will have the value
106   // 2^exponent.
107   const int highest_significant_bit = 63 - CountLeadingZeros(src);
108   const int64_t exponent = highest_significant_bit - fbits;
109 
110   return FPRoundToDouble(0, exponent, src, round);
111 }
112 
113 
FixedToFloat(int64_t src,int fbits,FPRounding round)114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115   if (src >= 0) {
116     return UFixedToFloat(src, fbits, round);
117   } else if (src == INT64_MIN) {
118     return -UFixedToFloat(src, fbits, round);
119   } else {
120     return -UFixedToFloat(-src, fbits, round);
121   }
122 }
123 
124 
UFixedToFloat(uint64_t src,int fbits,FPRounding round)125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126   // An input of 0 is a special case because the result is effectively
127   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128   if (src == 0) {
129     return 0.0f;
130   }
131 
132   // Calculate the exponent. The highest significant bit will have the value
133   // 2^exponent.
134   const int highest_significant_bit = 63 - CountLeadingZeros(src);
135   const int32_t exponent = highest_significant_bit - fbits;
136 
137   return FPRoundToFloat(0, exponent, src, round);
138 }
139 
140 
FixedToFloat16(int64_t src,int fbits,FPRounding round)141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142   if (src >= 0) {
143     return UFixedToFloat16(src, fbits, round);
144   } else if (src == INT64_MIN) {
145     return -UFixedToFloat16(src, fbits, round);
146   } else {
147     return -UFixedToFloat16(-src, fbits, round);
148   }
149 }
150 
151 
UFixedToFloat16(uint64_t src,int fbits,FPRounding round)152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153                                       int fbits,
154                                       FPRounding round) {
155   // An input of 0 is a special case because the result is effectively
156   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157   if (src == 0) {
158     return 0.0f;
159   }
160 
161   // Calculate the exponent. The highest significant bit will have the value
162   // 2^exponent.
163   const int highest_significant_bit = 63 - CountLeadingZeros(src);
164   const int16_t exponent = highest_significant_bit - fbits;
165 
166   return FPRoundToFloat16(0, exponent, src, round);
167 }
168 
169 
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)170 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
171   dst.ClearForWrite(vform);
172   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
173     LoadLane(dst, vform, i, addr);
174     addr += LaneSizeInBytesFromFormat(vform);
175   }
176 }
177 
178 
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)179 void Simulator::ld1(VectorFormat vform,
180                     LogicVRegister dst,
181                     int index,
182                     uint64_t addr) {
183   LoadLane(dst, vform, index, addr);
184 }
185 
186 
ld1r(VectorFormat vform,VectorFormat unpack_vform,LogicVRegister dst,uint64_t addr,bool is_signed)187 void Simulator::ld1r(VectorFormat vform,
188                      VectorFormat unpack_vform,
189                      LogicVRegister dst,
190                      uint64_t addr,
191                      bool is_signed) {
192   unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
193   dst.ClearForWrite(vform);
194   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
195     if (is_signed) {
196       LoadIntToLane(dst, vform, unpack_size, i, addr);
197     } else {
198       LoadUintToLane(dst, vform, unpack_size, i, addr);
199     }
200   }
201 }
202 
203 
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)204 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
205   ld1r(vform, vform, dst, addr);
206 }
207 
208 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)209 void Simulator::ld2(VectorFormat vform,
210                     LogicVRegister dst1,
211                     LogicVRegister dst2,
212                     uint64_t addr1) {
213   dst1.ClearForWrite(vform);
214   dst2.ClearForWrite(vform);
215   int esize = LaneSizeInBytesFromFormat(vform);
216   uint64_t addr2 = addr1 + esize;
217   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
218     LoadLane(dst1, vform, i, addr1);
219     LoadLane(dst2, vform, i, addr2);
220     addr1 += 2 * esize;
221     addr2 += 2 * esize;
222   }
223 }
224 
225 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)226 void Simulator::ld2(VectorFormat vform,
227                     LogicVRegister dst1,
228                     LogicVRegister dst2,
229                     int index,
230                     uint64_t addr1) {
231   dst1.ClearForWrite(vform);
232   dst2.ClearForWrite(vform);
233   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
234   LoadLane(dst1, vform, index, addr1);
235   LoadLane(dst2, vform, index, addr2);
236 }
237 
238 
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)239 void Simulator::ld2r(VectorFormat vform,
240                      LogicVRegister dst1,
241                      LogicVRegister dst2,
242                      uint64_t addr) {
243   dst1.ClearForWrite(vform);
244   dst2.ClearForWrite(vform);
245   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
246   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
247     LoadLane(dst1, vform, i, addr);
248     LoadLane(dst2, vform, i, addr2);
249   }
250 }
251 
252 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)253 void Simulator::ld3(VectorFormat vform,
254                     LogicVRegister dst1,
255                     LogicVRegister dst2,
256                     LogicVRegister dst3,
257                     uint64_t addr1) {
258   dst1.ClearForWrite(vform);
259   dst2.ClearForWrite(vform);
260   dst3.ClearForWrite(vform);
261   int esize = LaneSizeInBytesFromFormat(vform);
262   uint64_t addr2 = addr1 + esize;
263   uint64_t addr3 = addr2 + esize;
264   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
265     LoadLane(dst1, vform, i, addr1);
266     LoadLane(dst2, vform, i, addr2);
267     LoadLane(dst3, vform, i, addr3);
268     addr1 += 3 * esize;
269     addr2 += 3 * esize;
270     addr3 += 3 * esize;
271   }
272 }
273 
274 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)275 void Simulator::ld3(VectorFormat vform,
276                     LogicVRegister dst1,
277                     LogicVRegister dst2,
278                     LogicVRegister dst3,
279                     int index,
280                     uint64_t addr1) {
281   dst1.ClearForWrite(vform);
282   dst2.ClearForWrite(vform);
283   dst3.ClearForWrite(vform);
284   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
285   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
286   LoadLane(dst1, vform, index, addr1);
287   LoadLane(dst2, vform, index, addr2);
288   LoadLane(dst3, vform, index, addr3);
289 }
290 
291 
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)292 void Simulator::ld3r(VectorFormat vform,
293                      LogicVRegister dst1,
294                      LogicVRegister dst2,
295                      LogicVRegister dst3,
296                      uint64_t addr) {
297   dst1.ClearForWrite(vform);
298   dst2.ClearForWrite(vform);
299   dst3.ClearForWrite(vform);
300   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
301   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
302   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
303     LoadLane(dst1, vform, i, addr);
304     LoadLane(dst2, vform, i, addr2);
305     LoadLane(dst3, vform, i, addr3);
306   }
307 }
308 
309 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)310 void Simulator::ld4(VectorFormat vform,
311                     LogicVRegister dst1,
312                     LogicVRegister dst2,
313                     LogicVRegister dst3,
314                     LogicVRegister dst4,
315                     uint64_t addr1) {
316   dst1.ClearForWrite(vform);
317   dst2.ClearForWrite(vform);
318   dst3.ClearForWrite(vform);
319   dst4.ClearForWrite(vform);
320   int esize = LaneSizeInBytesFromFormat(vform);
321   uint64_t addr2 = addr1 + esize;
322   uint64_t addr3 = addr2 + esize;
323   uint64_t addr4 = addr3 + esize;
324   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
325     LoadLane(dst1, vform, i, addr1);
326     LoadLane(dst2, vform, i, addr2);
327     LoadLane(dst3, vform, i, addr3);
328     LoadLane(dst4, vform, i, addr4);
329     addr1 += 4 * esize;
330     addr2 += 4 * esize;
331     addr3 += 4 * esize;
332     addr4 += 4 * esize;
333   }
334 }
335 
336 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)337 void Simulator::ld4(VectorFormat vform,
338                     LogicVRegister dst1,
339                     LogicVRegister dst2,
340                     LogicVRegister dst3,
341                     LogicVRegister dst4,
342                     int index,
343                     uint64_t addr1) {
344   dst1.ClearForWrite(vform);
345   dst2.ClearForWrite(vform);
346   dst3.ClearForWrite(vform);
347   dst4.ClearForWrite(vform);
348   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
349   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
350   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
351   LoadLane(dst1, vform, index, addr1);
352   LoadLane(dst2, vform, index, addr2);
353   LoadLane(dst3, vform, index, addr3);
354   LoadLane(dst4, vform, index, addr4);
355 }
356 
357 
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)358 void Simulator::ld4r(VectorFormat vform,
359                      LogicVRegister dst1,
360                      LogicVRegister dst2,
361                      LogicVRegister dst3,
362                      LogicVRegister dst4,
363                      uint64_t addr) {
364   dst1.ClearForWrite(vform);
365   dst2.ClearForWrite(vform);
366   dst3.ClearForWrite(vform);
367   dst4.ClearForWrite(vform);
368   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
369   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
370   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
371   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
372     LoadLane(dst1, vform, i, addr);
373     LoadLane(dst2, vform, i, addr2);
374     LoadLane(dst3, vform, i, addr3);
375     LoadLane(dst4, vform, i, addr4);
376   }
377 }
378 
379 
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)380 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
381   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
382     StoreLane(src, vform, i, addr);
383     addr += LaneSizeInBytesFromFormat(vform);
384   }
385 }
386 
387 
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)388 void Simulator::st1(VectorFormat vform,
389                     LogicVRegister src,
390                     int index,
391                     uint64_t addr) {
392   StoreLane(src, vform, index, addr);
393 }
394 
395 
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,uint64_t addr)396 void Simulator::st2(VectorFormat vform,
397                     LogicVRegister src,
398                     LogicVRegister src2,
399                     uint64_t addr) {
400   int esize = LaneSizeInBytesFromFormat(vform);
401   uint64_t addr2 = addr + esize;
402   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
403     StoreLane(src, vform, i, addr);
404     StoreLane(src2, vform, i, addr2);
405     addr += 2 * esize;
406     addr2 += 2 * esize;
407   }
408 }
409 
410 
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,int index,uint64_t addr)411 void Simulator::st2(VectorFormat vform,
412                     LogicVRegister src,
413                     LogicVRegister src2,
414                     int index,
415                     uint64_t addr) {
416   int esize = LaneSizeInBytesFromFormat(vform);
417   StoreLane(src, vform, index, addr);
418   StoreLane(src2, vform, index, addr + 1 * esize);
419 }
420 
421 
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,uint64_t addr)422 void Simulator::st3(VectorFormat vform,
423                     LogicVRegister src,
424                     LogicVRegister src2,
425                     LogicVRegister src3,
426                     uint64_t addr) {
427   int esize = LaneSizeInBytesFromFormat(vform);
428   uint64_t addr2 = addr + esize;
429   uint64_t addr3 = addr2 + esize;
430   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
431     StoreLane(src, vform, i, addr);
432     StoreLane(src2, vform, i, addr2);
433     StoreLane(src3, vform, i, addr3);
434     addr += 3 * esize;
435     addr2 += 3 * esize;
436     addr3 += 3 * esize;
437   }
438 }
439 
440 
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,int index,uint64_t addr)441 void Simulator::st3(VectorFormat vform,
442                     LogicVRegister src,
443                     LogicVRegister src2,
444                     LogicVRegister src3,
445                     int index,
446                     uint64_t addr) {
447   int esize = LaneSizeInBytesFromFormat(vform);
448   StoreLane(src, vform, index, addr);
449   StoreLane(src2, vform, index, addr + 1 * esize);
450   StoreLane(src3, vform, index, addr + 2 * esize);
451 }
452 
453 
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,uint64_t addr)454 void Simulator::st4(VectorFormat vform,
455                     LogicVRegister src,
456                     LogicVRegister src2,
457                     LogicVRegister src3,
458                     LogicVRegister src4,
459                     uint64_t addr) {
460   int esize = LaneSizeInBytesFromFormat(vform);
461   uint64_t addr2 = addr + esize;
462   uint64_t addr3 = addr2 + esize;
463   uint64_t addr4 = addr3 + esize;
464   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
465     StoreLane(src, vform, i, addr);
466     StoreLane(src2, vform, i, addr2);
467     StoreLane(src3, vform, i, addr3);
468     StoreLane(src4, vform, i, addr4);
469     addr += 4 * esize;
470     addr2 += 4 * esize;
471     addr3 += 4 * esize;
472     addr4 += 4 * esize;
473   }
474 }
475 
476 
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,int index,uint64_t addr)477 void Simulator::st4(VectorFormat vform,
478                     LogicVRegister src,
479                     LogicVRegister src2,
480                     LogicVRegister src3,
481                     LogicVRegister src4,
482                     int index,
483                     uint64_t addr) {
484   int esize = LaneSizeInBytesFromFormat(vform);
485   StoreLane(src, vform, index, addr);
486   StoreLane(src2, vform, index, addr + 1 * esize);
487   StoreLane(src3, vform, index, addr + 2 * esize);
488   StoreLane(src4, vform, index, addr + 3 * esize);
489 }
490 
491 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)492 LogicVRegister Simulator::cmp(VectorFormat vform,
493                               LogicVRegister dst,
494                               const LogicVRegister& src1,
495                               const LogicVRegister& src2,
496                               Condition cond) {
497   dst.ClearForWrite(vform);
498   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
499     int64_t sa = src1.Int(vform, i);
500     int64_t sb = src2.Int(vform, i);
501     uint64_t ua = src1.Uint(vform, i);
502     uint64_t ub = src2.Uint(vform, i);
503     bool result = false;
504     switch (cond) {
505       case eq:
506         result = (ua == ub);
507         break;
508       case ge:
509         result = (sa >= sb);
510         break;
511       case gt:
512         result = (sa > sb);
513         break;
514       case hi:
515         result = (ua > ub);
516         break;
517       case hs:
518         result = (ua >= ub);
519         break;
520       case lt:
521         result = (sa < sb);
522         break;
523       case le:
524         result = (sa <= sb);
525         break;
526       default:
527         VIXL_UNREACHABLE();
528         break;
529     }
530     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
531   }
532   return dst;
533 }
534 
535 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)536 LogicVRegister Simulator::cmp(VectorFormat vform,
537                               LogicVRegister dst,
538                               const LogicVRegister& src1,
539                               int imm,
540                               Condition cond) {
541   SimVRegister temp;
542   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
543   return cmp(vform, dst, src1, imm_reg, cond);
544 }
545 
546 
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)547 LogicVRegister Simulator::cmptst(VectorFormat vform,
548                                  LogicVRegister dst,
549                                  const LogicVRegister& src1,
550                                  const LogicVRegister& src2) {
551   dst.ClearForWrite(vform);
552   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
553     uint64_t ua = src1.Uint(vform, i);
554     uint64_t ub = src2.Uint(vform, i);
555     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
556   }
557   return dst;
558 }
559 
560 
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)561 LogicVRegister Simulator::add(VectorFormat vform,
562                               LogicVRegister dst,
563                               const LogicVRegister& src1,
564                               const LogicVRegister& src2) {
565   int lane_size = LaneSizeInBitsFromFormat(vform);
566   dst.ClearForWrite(vform);
567 
568   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
569     // Test for unsigned saturation.
570     uint64_t ua = src1.UintLeftJustified(vform, i);
571     uint64_t ub = src2.UintLeftJustified(vform, i);
572     uint64_t ur = ua + ub;
573     if (ur < ua) {
574       dst.SetUnsignedSat(i, true);
575     }
576 
577     // Test for signed saturation.
578     bool pos_a = (ua >> 63) == 0;
579     bool pos_b = (ub >> 63) == 0;
580     bool pos_r = (ur >> 63) == 0;
581     // If the signs of the operands are the same, but different from the result,
582     // there was an overflow.
583     if ((pos_a == pos_b) && (pos_a != pos_r)) {
584       dst.SetSignedSat(i, pos_a);
585     }
586     dst.SetInt(vform, i, ur >> (64 - lane_size));
587   }
588   return dst;
589 }
590 
add_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)591 LogicVRegister Simulator::add_uint(VectorFormat vform,
592                                    LogicVRegister dst,
593                                    const LogicVRegister& src1,
594                                    uint64_t value) {
595   int lane_size = LaneSizeInBitsFromFormat(vform);
596   VIXL_ASSERT(IsUintN(lane_size, value));
597   dst.ClearForWrite(vform);
598   // Left-justify `value`.
599   uint64_t ub = value << (64 - lane_size);
600   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
601     // Test for unsigned saturation.
602     uint64_t ua = src1.UintLeftJustified(vform, i);
603     uint64_t ur = ua + ub;
604     if (ur < ua) {
605       dst.SetUnsignedSat(i, true);
606     }
607 
608     // Test for signed saturation.
609     // `value` is always positive, so we have an overflow if the (signed) result
610     // is smaller than the first operand.
611     if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
612       dst.SetSignedSat(i, true);
613     }
614 
615     dst.SetInt(vform, i, ur >> (64 - lane_size));
616   }
617   return dst;
618 }
619 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)620 LogicVRegister Simulator::addp(VectorFormat vform,
621                                LogicVRegister dst,
622                                const LogicVRegister& src1,
623                                const LogicVRegister& src2) {
624   SimVRegister temp1, temp2;
625   uzp1(vform, temp1, src1, src2);
626   uzp2(vform, temp2, src1, src2);
627   add(vform, dst, temp1, temp2);
628   return dst;
629 }
630 
sdiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)631 LogicVRegister Simulator::sdiv(VectorFormat vform,
632                                LogicVRegister dst,
633                                const LogicVRegister& src1,
634                                const LogicVRegister& src2) {
635   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
636 
637   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
638     int64_t val1 = src1.Int(vform, i);
639     int64_t val2 = src2.Int(vform, i);
640     int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
641     int64_t quotient = 0;
642     if ((val1 == min_int) && (val2 == -1)) {
643       quotient = min_int;
644     } else if (val2 != 0) {
645       quotient = val1 / val2;
646     }
647     dst.SetInt(vform, i, quotient);
648   }
649 
650   return dst;
651 }
652 
udiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)653 LogicVRegister Simulator::udiv(VectorFormat vform,
654                                LogicVRegister dst,
655                                const LogicVRegister& src1,
656                                const LogicVRegister& src2) {
657   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
658 
659   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
660     uint64_t val1 = src1.Uint(vform, i);
661     uint64_t val2 = src2.Uint(vform, i);
662     uint64_t quotient = 0;
663     if (val2 != 0) {
664       quotient = val1 / val2;
665     }
666     dst.SetUint(vform, i, quotient);
667   }
668 
669   return dst;
670 }
671 
672 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)673 LogicVRegister Simulator::mla(VectorFormat vform,
674                               LogicVRegister dst,
675                               const LogicVRegister& srca,
676                               const LogicVRegister& src1,
677                               const LogicVRegister& src2) {
678   SimVRegister temp;
679   mul(vform, temp, src1, src2);
680   add(vform, dst, srca, temp);
681   return dst;
682 }
683 
684 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)685 LogicVRegister Simulator::mls(VectorFormat vform,
686                               LogicVRegister dst,
687                               const LogicVRegister& srca,
688                               const LogicVRegister& src1,
689                               const LogicVRegister& src2) {
690   SimVRegister temp;
691   mul(vform, temp, src1, src2);
692   sub(vform, dst, srca, temp);
693   return dst;
694 }
695 
696 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)697 LogicVRegister Simulator::mul(VectorFormat vform,
698                               LogicVRegister dst,
699                               const LogicVRegister& src1,
700                               const LogicVRegister& src2) {
701   dst.ClearForWrite(vform);
702 
703   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
704     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
705   }
706   return dst;
707 }
708 
709 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)710 LogicVRegister Simulator::mul(VectorFormat vform,
711                               LogicVRegister dst,
712                               const LogicVRegister& src1,
713                               const LogicVRegister& src2,
714                               int index) {
715   SimVRegister temp;
716   VectorFormat indexform = VectorFormatFillQ(vform);
717   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
718 }
719 
720 
smulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)721 LogicVRegister Simulator::smulh(VectorFormat vform,
722                                 LogicVRegister dst,
723                                 const LogicVRegister& src1,
724                                 const LogicVRegister& src2) {
725   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
726     int64_t dst_val;
727     int64_t val1 = src1.Int(vform, i);
728     int64_t val2 = src2.Int(vform, i);
729     switch (LaneSizeInBitsFromFormat(vform)) {
730       case 8:
731         dst_val = internal::MultiplyHigh<8>(val1, val2);
732         break;
733       case 16:
734         dst_val = internal::MultiplyHigh<16>(val1, val2);
735         break;
736       case 32:
737         dst_val = internal::MultiplyHigh<32>(val1, val2);
738         break;
739       case 64:
740         dst_val = internal::MultiplyHigh<64>(val1, val2);
741         break;
742       default:
743         dst_val = 0xbadbeef;
744         VIXL_UNREACHABLE();
745         break;
746     }
747     dst.SetInt(vform, i, dst_val);
748   }
749   return dst;
750 }
751 
752 
umulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)753 LogicVRegister Simulator::umulh(VectorFormat vform,
754                                 LogicVRegister dst,
755                                 const LogicVRegister& src1,
756                                 const LogicVRegister& src2) {
757   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
758     uint64_t dst_val;
759     uint64_t val1 = src1.Uint(vform, i);
760     uint64_t val2 = src2.Uint(vform, i);
761     switch (LaneSizeInBitsFromFormat(vform)) {
762       case 8:
763         dst_val = internal::MultiplyHigh<8>(val1, val2);
764         break;
765       case 16:
766         dst_val = internal::MultiplyHigh<16>(val1, val2);
767         break;
768       case 32:
769         dst_val = internal::MultiplyHigh<32>(val1, val2);
770         break;
771       case 64:
772         dst_val = internal::MultiplyHigh<64>(val1, val2);
773         break;
774       default:
775         dst_val = 0xbadbeef;
776         VIXL_UNREACHABLE();
777         break;
778     }
779     dst.SetUint(vform, i, dst_val);
780   }
781   return dst;
782 }
783 
784 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)785 LogicVRegister Simulator::mla(VectorFormat vform,
786                               LogicVRegister dst,
787                               const LogicVRegister& src1,
788                               const LogicVRegister& src2,
789                               int index) {
790   SimVRegister temp;
791   VectorFormat indexform = VectorFormatFillQ(vform);
792   return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
793 }
794 
795 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)796 LogicVRegister Simulator::mls(VectorFormat vform,
797                               LogicVRegister dst,
798                               const LogicVRegister& src1,
799                               const LogicVRegister& src2,
800                               int index) {
801   SimVRegister temp;
802   VectorFormat indexform = VectorFormatFillQ(vform);
803   return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
804 }
805 
806 
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)807 LogicVRegister Simulator::smull(VectorFormat vform,
808                                 LogicVRegister dst,
809                                 const LogicVRegister& src1,
810                                 const LogicVRegister& src2,
811                                 int index) {
812   SimVRegister temp;
813   VectorFormat indexform =
814       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
815   return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
816 }
817 
818 
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)819 LogicVRegister Simulator::smull2(VectorFormat vform,
820                                  LogicVRegister dst,
821                                  const LogicVRegister& src1,
822                                  const LogicVRegister& src2,
823                                  int index) {
824   SimVRegister temp;
825   VectorFormat indexform =
826       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
827   return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
828 }
829 
830 
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)831 LogicVRegister Simulator::umull(VectorFormat vform,
832                                 LogicVRegister dst,
833                                 const LogicVRegister& src1,
834                                 const LogicVRegister& src2,
835                                 int index) {
836   SimVRegister temp;
837   VectorFormat indexform =
838       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
839   return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
840 }
841 
842 
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)843 LogicVRegister Simulator::umull2(VectorFormat vform,
844                                  LogicVRegister dst,
845                                  const LogicVRegister& src1,
846                                  const LogicVRegister& src2,
847                                  int index) {
848   SimVRegister temp;
849   VectorFormat indexform =
850       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
851   return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
852 }
853 
854 
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)855 LogicVRegister Simulator::smlal(VectorFormat vform,
856                                 LogicVRegister dst,
857                                 const LogicVRegister& src1,
858                                 const LogicVRegister& src2,
859                                 int index) {
860   SimVRegister temp;
861   VectorFormat indexform =
862       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
863   return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
864 }
865 
866 
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)867 LogicVRegister Simulator::smlal2(VectorFormat vform,
868                                  LogicVRegister dst,
869                                  const LogicVRegister& src1,
870                                  const LogicVRegister& src2,
871                                  int index) {
872   SimVRegister temp;
873   VectorFormat indexform =
874       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
875   return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
876 }
877 
878 
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)879 LogicVRegister Simulator::umlal(VectorFormat vform,
880                                 LogicVRegister dst,
881                                 const LogicVRegister& src1,
882                                 const LogicVRegister& src2,
883                                 int index) {
884   SimVRegister temp;
885   VectorFormat indexform =
886       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
887   return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
888 }
889 
890 
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)891 LogicVRegister Simulator::umlal2(VectorFormat vform,
892                                  LogicVRegister dst,
893                                  const LogicVRegister& src1,
894                                  const LogicVRegister& src2,
895                                  int index) {
896   SimVRegister temp;
897   VectorFormat indexform =
898       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
899   return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
900 }
901 
902 
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)903 LogicVRegister Simulator::smlsl(VectorFormat vform,
904                                 LogicVRegister dst,
905                                 const LogicVRegister& src1,
906                                 const LogicVRegister& src2,
907                                 int index) {
908   SimVRegister temp;
909   VectorFormat indexform =
910       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
911   return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
912 }
913 
914 
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)915 LogicVRegister Simulator::smlsl2(VectorFormat vform,
916                                  LogicVRegister dst,
917                                  const LogicVRegister& src1,
918                                  const LogicVRegister& src2,
919                                  int index) {
920   SimVRegister temp;
921   VectorFormat indexform =
922       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
923   return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
924 }
925 
926 
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)927 LogicVRegister Simulator::umlsl(VectorFormat vform,
928                                 LogicVRegister dst,
929                                 const LogicVRegister& src1,
930                                 const LogicVRegister& src2,
931                                 int index) {
932   SimVRegister temp;
933   VectorFormat indexform =
934       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
935   return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
936 }
937 
938 
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)939 LogicVRegister Simulator::umlsl2(VectorFormat vform,
940                                  LogicVRegister dst,
941                                  const LogicVRegister& src1,
942                                  const LogicVRegister& src2,
943                                  int index) {
944   SimVRegister temp;
945   VectorFormat indexform =
946       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
947   return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
948 }
949 
950 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)951 LogicVRegister Simulator::sqdmull(VectorFormat vform,
952                                   LogicVRegister dst,
953                                   const LogicVRegister& src1,
954                                   const LogicVRegister& src2,
955                                   int index) {
956   SimVRegister temp;
957   VectorFormat indexform =
958       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
959   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
960 }
961 
962 
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)963 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
964                                    LogicVRegister dst,
965                                    const LogicVRegister& src1,
966                                    const LogicVRegister& src2,
967                                    int index) {
968   SimVRegister temp;
969   VectorFormat indexform =
970       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
971   return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
972 }
973 
974 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)975 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
976                                   LogicVRegister dst,
977                                   const LogicVRegister& src1,
978                                   const LogicVRegister& src2,
979                                   int index) {
980   SimVRegister temp;
981   VectorFormat indexform =
982       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
983   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
984 }
985 
986 
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)987 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
988                                    LogicVRegister dst,
989                                    const LogicVRegister& src1,
990                                    const LogicVRegister& src2,
991                                    int index) {
992   SimVRegister temp;
993   VectorFormat indexform =
994       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
995   return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
996 }
997 
998 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)999 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
1000                                   LogicVRegister dst,
1001                                   const LogicVRegister& src1,
1002                                   const LogicVRegister& src2,
1003                                   int index) {
1004   SimVRegister temp;
1005   VectorFormat indexform =
1006       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1007   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1008 }
1009 
1010 
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1011 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
1012                                    LogicVRegister dst,
1013                                    const LogicVRegister& src1,
1014                                    const LogicVRegister& src2,
1015                                    int index) {
1016   SimVRegister temp;
1017   VectorFormat indexform =
1018       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1019   return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1020 }
1021 
1022 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1023 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
1024                                   LogicVRegister dst,
1025                                   const LogicVRegister& src1,
1026                                   const LogicVRegister& src2,
1027                                   int index) {
1028   SimVRegister temp;
1029   VectorFormat indexform = VectorFormatFillQ(vform);
1030   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1031 }
1032 
1033 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1034 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
1035                                    LogicVRegister dst,
1036                                    const LogicVRegister& src1,
1037                                    const LogicVRegister& src2,
1038                                    int index) {
1039   SimVRegister temp;
1040   VectorFormat indexform = VectorFormatFillQ(vform);
1041   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1042 }
1043 
1044 
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1045 LogicVRegister Simulator::sdot(VectorFormat vform,
1046                                LogicVRegister dst,
1047                                const LogicVRegister& src1,
1048                                const LogicVRegister& src2,
1049                                int index) {
1050   SimVRegister temp;
1051   // NEON indexed `dot` allows the index value exceed the register size.
1052   // Promote the format to Q-sized vector format before the duplication.
1053   dup_elements_to_segments(IsSVEFormat(vform) ? vform
1054                                               : VectorFormatFillQ(vform),
1055                            temp,
1056                            src2,
1057                            index);
1058   return sdot(vform, dst, src1, temp);
1059 }
1060 
1061 
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1062 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
1063                                    LogicVRegister dst,
1064                                    const LogicVRegister& src1,
1065                                    const LogicVRegister& src2,
1066                                    int index) {
1067   SimVRegister temp;
1068   VectorFormat indexform = VectorFormatFillQ(vform);
1069   return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
1070 }
1071 
1072 
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1073 LogicVRegister Simulator::udot(VectorFormat vform,
1074                                LogicVRegister dst,
1075                                const LogicVRegister& src1,
1076                                const LogicVRegister& src2,
1077                                int index) {
1078   SimVRegister temp;
1079   // NEON indexed `dot` allows the index value exceed the register size.
1080   // Promote the format to Q-sized vector format before the duplication.
1081   dup_elements_to_segments(IsSVEFormat(vform) ? vform
1082                                               : VectorFormatFillQ(vform),
1083                            temp,
1084                            src2,
1085                            index);
1086   return udot(vform, dst, src1, temp);
1087 }
1088 
1089 
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1090 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
1091                                    LogicVRegister dst,
1092                                    const LogicVRegister& src1,
1093                                    const LogicVRegister& src2,
1094                                    int index) {
1095   SimVRegister temp;
1096   VectorFormat indexform = VectorFormatFillQ(vform);
1097   return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1098 }
1099 
1100 
PolynomialMult(uint8_t op1,uint8_t op2) const1101 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const {
1102   uint16_t result = 0;
1103   uint16_t extended_op2 = op2;
1104   for (int i = 0; i < 8; ++i) {
1105     if ((op1 >> i) & 1) {
1106       result = result ^ (extended_op2 << i);
1107     }
1108   }
1109   return result;
1110 }
1111 
1112 
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1113 LogicVRegister Simulator::pmul(VectorFormat vform,
1114                                LogicVRegister dst,
1115                                const LogicVRegister& src1,
1116                                const LogicVRegister& src2) {
1117   dst.ClearForWrite(vform);
1118   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1119     dst.SetUint(vform,
1120                 i,
1121                 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
1122   }
1123   return dst;
1124 }
1125 
1126 
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1127 LogicVRegister Simulator::pmull(VectorFormat vform,
1128                                 LogicVRegister dst,
1129                                 const LogicVRegister& src1,
1130                                 const LogicVRegister& src2) {
1131   VectorFormat vform_src = VectorFormatHalfWidth(vform);
1132   dst.ClearForWrite(vform);
1133   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1134     dst.SetUint(vform,
1135                 i,
1136                 PolynomialMult(src1.Uint(vform_src, i),
1137                                src2.Uint(vform_src, i)));
1138   }
1139   return dst;
1140 }
1141 
1142 
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1143 LogicVRegister Simulator::pmull2(VectorFormat vform,
1144                                  LogicVRegister dst,
1145                                  const LogicVRegister& src1,
1146                                  const LogicVRegister& src2) {
1147   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
1148   dst.ClearForWrite(vform);
1149   int lane_count = LaneCountFromFormat(vform);
1150   for (int i = 0; i < lane_count; i++) {
1151     dst.SetUint(vform,
1152                 i,
1153                 PolynomialMult(src1.Uint(vform_src, lane_count + i),
1154                                src2.Uint(vform_src, lane_count + i)));
1155   }
1156   return dst;
1157 }
1158 
1159 
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1160 LogicVRegister Simulator::sub(VectorFormat vform,
1161                               LogicVRegister dst,
1162                               const LogicVRegister& src1,
1163                               const LogicVRegister& src2) {
1164   int lane_size = LaneSizeInBitsFromFormat(vform);
1165   dst.ClearForWrite(vform);
1166   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1167     // Test for unsigned saturation.
1168     uint64_t ua = src1.UintLeftJustified(vform, i);
1169     uint64_t ub = src2.UintLeftJustified(vform, i);
1170     uint64_t ur = ua - ub;
1171     if (ub > ua) {
1172       dst.SetUnsignedSat(i, false);
1173     }
1174 
1175     // Test for signed saturation.
1176     bool pos_a = (ua >> 63) == 0;
1177     bool pos_b = (ub >> 63) == 0;
1178     bool pos_r = (ur >> 63) == 0;
1179     // If the signs of the operands are different, and the sign of the first
1180     // operand doesn't match the result, there was an overflow.
1181     if ((pos_a != pos_b) && (pos_a != pos_r)) {
1182       dst.SetSignedSat(i, pos_a);
1183     }
1184 
1185     dst.SetInt(vform, i, ur >> (64 - lane_size));
1186   }
1187   return dst;
1188 }
1189 
sub_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)1190 LogicVRegister Simulator::sub_uint(VectorFormat vform,
1191                                    LogicVRegister dst,
1192                                    const LogicVRegister& src1,
1193                                    uint64_t value) {
1194   int lane_size = LaneSizeInBitsFromFormat(vform);
1195   VIXL_ASSERT(IsUintN(lane_size, value));
1196   dst.ClearForWrite(vform);
1197   // Left-justify `value`.
1198   uint64_t ub = value << (64 - lane_size);
1199   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1200     // Test for unsigned saturation.
1201     uint64_t ua = src1.UintLeftJustified(vform, i);
1202     uint64_t ur = ua - ub;
1203     if (ub > ua) {
1204       dst.SetUnsignedSat(i, false);
1205     }
1206 
1207     // Test for signed saturation.
1208     // `value` is always positive, so we have an overflow if the (signed) result
1209     // is greater than the first operand.
1210     if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1211       dst.SetSignedSat(i, false);
1212     }
1213 
1214     dst.SetInt(vform, i, ur >> (64 - lane_size));
1215   }
1216   return dst;
1217 }
1218 
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1219 LogicVRegister Simulator::and_(VectorFormat vform,
1220                                LogicVRegister dst,
1221                                const LogicVRegister& src1,
1222                                const LogicVRegister& src2) {
1223   dst.ClearForWrite(vform);
1224   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1225     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1226   }
1227   return dst;
1228 }
1229 
1230 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1231 LogicVRegister Simulator::orr(VectorFormat vform,
1232                               LogicVRegister dst,
1233                               const LogicVRegister& src1,
1234                               const LogicVRegister& src2) {
1235   dst.ClearForWrite(vform);
1236   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1237     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1238   }
1239   return dst;
1240 }
1241 
1242 
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1243 LogicVRegister Simulator::orn(VectorFormat vform,
1244                               LogicVRegister dst,
1245                               const LogicVRegister& src1,
1246                               const LogicVRegister& src2) {
1247   dst.ClearForWrite(vform);
1248   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1249     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1250   }
1251   return dst;
1252 }
1253 
1254 
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1255 LogicVRegister Simulator::eor(VectorFormat vform,
1256                               LogicVRegister dst,
1257                               const LogicVRegister& src1,
1258                               const LogicVRegister& src2) {
1259   dst.ClearForWrite(vform);
1260   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1261     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1262   }
1263   return dst;
1264 }
1265 
1266 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1267 LogicVRegister Simulator::bic(VectorFormat vform,
1268                               LogicVRegister dst,
1269                               const LogicVRegister& src1,
1270                               const LogicVRegister& src2) {
1271   dst.ClearForWrite(vform);
1272   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1273     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1274   }
1275   return dst;
1276 }
1277 
1278 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1279 LogicVRegister Simulator::bic(VectorFormat vform,
1280                               LogicVRegister dst,
1281                               const LogicVRegister& src,
1282                               uint64_t imm) {
1283   uint64_t result[16];
1284   int lane_count = LaneCountFromFormat(vform);
1285   for (int i = 0; i < lane_count; ++i) {
1286     result[i] = src.Uint(vform, i) & ~imm;
1287   }
1288   dst.ClearForWrite(vform);
1289   for (int i = 0; i < lane_count; ++i) {
1290     dst.SetUint(vform, i, result[i]);
1291   }
1292   return dst;
1293 }
1294 
1295 
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1296 LogicVRegister Simulator::bif(VectorFormat vform,
1297                               LogicVRegister dst,
1298                               const LogicVRegister& src1,
1299                               const LogicVRegister& src2) {
1300   dst.ClearForWrite(vform);
1301   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1302     uint64_t operand1 = dst.Uint(vform, i);
1303     uint64_t operand2 = ~src2.Uint(vform, i);
1304     uint64_t operand3 = src1.Uint(vform, i);
1305     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1306     dst.SetUint(vform, i, result);
1307   }
1308   return dst;
1309 }
1310 
1311 
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1312 LogicVRegister Simulator::bit(VectorFormat vform,
1313                               LogicVRegister dst,
1314                               const LogicVRegister& src1,
1315                               const LogicVRegister& src2) {
1316   dst.ClearForWrite(vform);
1317   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1318     uint64_t operand1 = dst.Uint(vform, i);
1319     uint64_t operand2 = src2.Uint(vform, i);
1320     uint64_t operand3 = src1.Uint(vform, i);
1321     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1322     dst.SetUint(vform, i, result);
1323   }
1324   return dst;
1325 }
1326 
1327 
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1328 LogicVRegister Simulator::bsl(VectorFormat vform,
1329                               LogicVRegister dst,
1330                               const LogicVRegister& src1,
1331                               const LogicVRegister& src2) {
1332   dst.ClearForWrite(vform);
1333   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1334     uint64_t operand1 = src2.Uint(vform, i);
1335     uint64_t operand2 = dst.Uint(vform, i);
1336     uint64_t operand3 = src1.Uint(vform, i);
1337     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1338     dst.SetUint(vform, i, result);
1339   }
1340   return dst;
1341 }
1342 
1343 
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1344 LogicVRegister Simulator::sminmax(VectorFormat vform,
1345                                   LogicVRegister dst,
1346                                   const LogicVRegister& src1,
1347                                   const LogicVRegister& src2,
1348                                   bool max) {
1349   dst.ClearForWrite(vform);
1350   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1351     int64_t src1_val = src1.Int(vform, i);
1352     int64_t src2_val = src2.Int(vform, i);
1353     int64_t dst_val;
1354     if (max) {
1355       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1356     } else {
1357       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1358     }
1359     dst.SetInt(vform, i, dst_val);
1360   }
1361   return dst;
1362 }
1363 
1364 
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1365 LogicVRegister Simulator::smax(VectorFormat vform,
1366                                LogicVRegister dst,
1367                                const LogicVRegister& src1,
1368                                const LogicVRegister& src2) {
1369   return sminmax(vform, dst, src1, src2, true);
1370 }
1371 
1372 
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1373 LogicVRegister Simulator::smin(VectorFormat vform,
1374                                LogicVRegister dst,
1375                                const LogicVRegister& src1,
1376                                const LogicVRegister& src2) {
1377   return sminmax(vform, dst, src1, src2, false);
1378 }
1379 
1380 
sminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1381 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1382                                    LogicVRegister dst,
1383                                    const LogicVRegister& src1,
1384                                    const LogicVRegister& src2,
1385                                    bool max) {
1386   int lanes = LaneCountFromFormat(vform);
1387   int64_t result[kMaxLanesPerVector];
1388   const LogicVRegister* src = &src1;
1389   for (int j = 0; j < 2; j++) {
1390     for (int i = 0; i < lanes; i += 2) {
1391       int64_t first_val = src->Int(vform, i);
1392       int64_t second_val = src->Int(vform, i + 1);
1393       int64_t dst_val;
1394       if (max) {
1395         dst_val = (first_val > second_val) ? first_val : second_val;
1396       } else {
1397         dst_val = (first_val < second_val) ? first_val : second_val;
1398       }
1399       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1400       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1401     }
1402     src = &src2;
1403   }
1404   dst.SetIntArray(vform, result);
1405   return dst;
1406 }
1407 
1408 
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1409 LogicVRegister Simulator::smaxp(VectorFormat vform,
1410                                 LogicVRegister dst,
1411                                 const LogicVRegister& src1,
1412                                 const LogicVRegister& src2) {
1413   return sminmaxp(vform, dst, src1, src2, true);
1414 }
1415 
1416 
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1417 LogicVRegister Simulator::sminp(VectorFormat vform,
1418                                 LogicVRegister dst,
1419                                 const LogicVRegister& src1,
1420                                 const LogicVRegister& src2) {
1421   return sminmaxp(vform, dst, src1, src2, false);
1422 }
1423 
1424 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1425 LogicVRegister Simulator::addp(VectorFormat vform,
1426                                LogicVRegister dst,
1427                                const LogicVRegister& src) {
1428   VIXL_ASSERT(vform == kFormatD);
1429 
1430   uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1431   dst.ClearForWrite(vform);
1432   dst.SetUint(vform, 0, dst_val);
1433   return dst;
1434 }
1435 
1436 
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1437 LogicVRegister Simulator::addv(VectorFormat vform,
1438                                LogicVRegister dst,
1439                                const LogicVRegister& src) {
1440   VectorFormat vform_dst =
1441       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1442 
1443 
1444   int64_t dst_val = 0;
1445   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1446     dst_val += src.Int(vform, i);
1447   }
1448 
1449   dst.ClearForWrite(vform_dst);
1450   dst.SetInt(vform_dst, 0, dst_val);
1451   return dst;
1452 }
1453 
1454 
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1455 LogicVRegister Simulator::saddlv(VectorFormat vform,
1456                                  LogicVRegister dst,
1457                                  const LogicVRegister& src) {
1458   VectorFormat vform_dst =
1459       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1460 
1461   int64_t dst_val = 0;
1462   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1463     dst_val += src.Int(vform, i);
1464   }
1465 
1466   dst.ClearForWrite(vform_dst);
1467   dst.SetInt(vform_dst, 0, dst_val);
1468   return dst;
1469 }
1470 
1471 
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1472 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1473                                  LogicVRegister dst,
1474                                  const LogicVRegister& src) {
1475   VectorFormat vform_dst =
1476       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1477 
1478   uint64_t dst_val = 0;
1479   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1480     dst_val += src.Uint(vform, i);
1481   }
1482 
1483   dst.ClearForWrite(vform_dst);
1484   dst.SetUint(vform_dst, 0, dst_val);
1485   return dst;
1486 }
1487 
1488 
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1489 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1490                                    LogicVRegister dst,
1491                                    const LogicPRegister& pg,
1492                                    const LogicVRegister& src,
1493                                    bool max) {
1494   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1495   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1496     if (!pg.IsActive(vform, i)) continue;
1497 
1498     int64_t src_val = src.Int(vform, i);
1499     if (max) {
1500       dst_val = (src_val > dst_val) ? src_val : dst_val;
1501     } else {
1502       dst_val = (src_val < dst_val) ? src_val : dst_val;
1503     }
1504   }
1505   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1506   dst.SetInt(vform, 0, dst_val);
1507   return dst;
1508 }
1509 
1510 
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1511 LogicVRegister Simulator::smaxv(VectorFormat vform,
1512                                 LogicVRegister dst,
1513                                 const LogicVRegister& src) {
1514   sminmaxv(vform, dst, GetPTrue(), src, true);
1515   return dst;
1516 }
1517 
1518 
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1519 LogicVRegister Simulator::sminv(VectorFormat vform,
1520                                 LogicVRegister dst,
1521                                 const LogicVRegister& src) {
1522   sminmaxv(vform, dst, GetPTrue(), src, false);
1523   return dst;
1524 }
1525 
1526 
smaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1527 LogicVRegister Simulator::smaxv(VectorFormat vform,
1528                                 LogicVRegister dst,
1529                                 const LogicPRegister& pg,
1530                                 const LogicVRegister& src) {
1531   VIXL_ASSERT(IsSVEFormat(vform));
1532   sminmaxv(vform, dst, pg, src, true);
1533   return dst;
1534 }
1535 
1536 
sminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1537 LogicVRegister Simulator::sminv(VectorFormat vform,
1538                                 LogicVRegister dst,
1539                                 const LogicPRegister& pg,
1540                                 const LogicVRegister& src) {
1541   VIXL_ASSERT(IsSVEFormat(vform));
1542   sminmaxv(vform, dst, pg, src, false);
1543   return dst;
1544 }
1545 
1546 
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1547 LogicVRegister Simulator::uminmax(VectorFormat vform,
1548                                   LogicVRegister dst,
1549                                   const LogicVRegister& src1,
1550                                   const LogicVRegister& src2,
1551                                   bool max) {
1552   dst.ClearForWrite(vform);
1553   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1554     uint64_t src1_val = src1.Uint(vform, i);
1555     uint64_t src2_val = src2.Uint(vform, i);
1556     uint64_t dst_val;
1557     if (max) {
1558       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1559     } else {
1560       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1561     }
1562     dst.SetUint(vform, i, dst_val);
1563   }
1564   return dst;
1565 }
1566 
1567 
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1568 LogicVRegister Simulator::umax(VectorFormat vform,
1569                                LogicVRegister dst,
1570                                const LogicVRegister& src1,
1571                                const LogicVRegister& src2) {
1572   return uminmax(vform, dst, src1, src2, true);
1573 }
1574 
1575 
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1576 LogicVRegister Simulator::umin(VectorFormat vform,
1577                                LogicVRegister dst,
1578                                const LogicVRegister& src1,
1579                                const LogicVRegister& src2) {
1580   return uminmax(vform, dst, src1, src2, false);
1581 }
1582 
1583 
uminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1584 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1585                                    LogicVRegister dst,
1586                                    const LogicVRegister& src1,
1587                                    const LogicVRegister& src2,
1588                                    bool max) {
1589   int lanes = LaneCountFromFormat(vform);
1590   uint64_t result[kMaxLanesPerVector];
1591   const LogicVRegister* src = &src1;
1592   for (int j = 0; j < 2; j++) {
1593     for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1594       uint64_t first_val = src->Uint(vform, i);
1595       uint64_t second_val = src->Uint(vform, i + 1);
1596       uint64_t dst_val;
1597       if (max) {
1598         dst_val = (first_val > second_val) ? first_val : second_val;
1599       } else {
1600         dst_val = (first_val < second_val) ? first_val : second_val;
1601       }
1602       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1603       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1604     }
1605     src = &src2;
1606   }
1607   dst.SetUintArray(vform, result);
1608   return dst;
1609 }
1610 
1611 
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1612 LogicVRegister Simulator::umaxp(VectorFormat vform,
1613                                 LogicVRegister dst,
1614                                 const LogicVRegister& src1,
1615                                 const LogicVRegister& src2) {
1616   return uminmaxp(vform, dst, src1, src2, true);
1617 }
1618 
1619 
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1620 LogicVRegister Simulator::uminp(VectorFormat vform,
1621                                 LogicVRegister dst,
1622                                 const LogicVRegister& src1,
1623                                 const LogicVRegister& src2) {
1624   return uminmaxp(vform, dst, src1, src2, false);
1625 }
1626 
1627 
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1628 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1629                                    LogicVRegister dst,
1630                                    const LogicPRegister& pg,
1631                                    const LogicVRegister& src,
1632                                    bool max) {
1633   uint64_t dst_val = max ? 0 : UINT64_MAX;
1634   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1635     if (!pg.IsActive(vform, i)) continue;
1636 
1637     uint64_t src_val = src.Uint(vform, i);
1638     if (max) {
1639       dst_val = (src_val > dst_val) ? src_val : dst_val;
1640     } else {
1641       dst_val = (src_val < dst_val) ? src_val : dst_val;
1642     }
1643   }
1644   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1645   dst.SetUint(vform, 0, dst_val);
1646   return dst;
1647 }
1648 
1649 
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1650 LogicVRegister Simulator::umaxv(VectorFormat vform,
1651                                 LogicVRegister dst,
1652                                 const LogicVRegister& src) {
1653   uminmaxv(vform, dst, GetPTrue(), src, true);
1654   return dst;
1655 }
1656 
1657 
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1658 LogicVRegister Simulator::uminv(VectorFormat vform,
1659                                 LogicVRegister dst,
1660                                 const LogicVRegister& src) {
1661   uminmaxv(vform, dst, GetPTrue(), src, false);
1662   return dst;
1663 }
1664 
1665 
umaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1666 LogicVRegister Simulator::umaxv(VectorFormat vform,
1667                                 LogicVRegister dst,
1668                                 const LogicPRegister& pg,
1669                                 const LogicVRegister& src) {
1670   VIXL_ASSERT(IsSVEFormat(vform));
1671   uminmaxv(vform, dst, pg, src, true);
1672   return dst;
1673 }
1674 
1675 
uminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1676 LogicVRegister Simulator::uminv(VectorFormat vform,
1677                                 LogicVRegister dst,
1678                                 const LogicPRegister& pg,
1679                                 const LogicVRegister& src) {
1680   VIXL_ASSERT(IsSVEFormat(vform));
1681   uminmaxv(vform, dst, pg, src, false);
1682   return dst;
1683 }
1684 
1685 
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1686 LogicVRegister Simulator::shl(VectorFormat vform,
1687                               LogicVRegister dst,
1688                               const LogicVRegister& src,
1689                               int shift) {
1690   VIXL_ASSERT(shift >= 0);
1691   SimVRegister temp;
1692   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1693   return ushl(vform, dst, src, shiftreg);
1694 }
1695 
1696 
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1697 LogicVRegister Simulator::sshll(VectorFormat vform,
1698                                 LogicVRegister dst,
1699                                 const LogicVRegister& src,
1700                                 int shift) {
1701   VIXL_ASSERT(shift >= 0);
1702   SimVRegister temp1, temp2;
1703   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1704   LogicVRegister extendedreg = sxtl(vform, temp2, src);
1705   return sshl(vform, dst, extendedreg, shiftreg);
1706 }
1707 
1708 
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1709 LogicVRegister Simulator::sshll2(VectorFormat vform,
1710                                  LogicVRegister dst,
1711                                  const LogicVRegister& src,
1712                                  int shift) {
1713   VIXL_ASSERT(shift >= 0);
1714   SimVRegister temp1, temp2;
1715   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1716   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1717   return sshl(vform, dst, extendedreg, shiftreg);
1718 }
1719 
1720 
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1721 LogicVRegister Simulator::shll(VectorFormat vform,
1722                                LogicVRegister dst,
1723                                const LogicVRegister& src) {
1724   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1725   return sshll(vform, dst, src, shift);
1726 }
1727 
1728 
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1729 LogicVRegister Simulator::shll2(VectorFormat vform,
1730                                 LogicVRegister dst,
1731                                 const LogicVRegister& src) {
1732   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1733   return sshll2(vform, dst, src, shift);
1734 }
1735 
1736 
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1737 LogicVRegister Simulator::ushll(VectorFormat vform,
1738                                 LogicVRegister dst,
1739                                 const LogicVRegister& src,
1740                                 int shift) {
1741   VIXL_ASSERT(shift >= 0);
1742   SimVRegister temp1, temp2;
1743   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1744   LogicVRegister extendedreg = uxtl(vform, temp2, src);
1745   return ushl(vform, dst, extendedreg, shiftreg);
1746 }
1747 
1748 
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1749 LogicVRegister Simulator::ushll2(VectorFormat vform,
1750                                  LogicVRegister dst,
1751                                  const LogicVRegister& src,
1752                                  int shift) {
1753   VIXL_ASSERT(shift >= 0);
1754   SimVRegister temp1, temp2;
1755   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1756   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1757   return ushl(vform, dst, extendedreg, shiftreg);
1758 }
1759 
clast(VectorFormat vform,const LogicPRegister & pg,const LogicVRegister & src,int offset_from_last_active)1760 std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1761                                            const LogicPRegister& pg,
1762                                            const LogicVRegister& src,
1763                                            int offset_from_last_active) {
1764   // Untested for any other values.
1765   VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1766 
1767   int last_active = GetLastActive(vform, pg);
1768   int lane_count = LaneCountFromFormat(vform);
1769   int index =
1770       ((last_active + offset_from_last_active) + lane_count) % lane_count;
1771   return std::make_pair(last_active >= 0, src.Uint(vform, index));
1772 }
1773 
compact(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1774 LogicVRegister Simulator::compact(VectorFormat vform,
1775                                   LogicVRegister dst,
1776                                   const LogicPRegister& pg,
1777                                   const LogicVRegister& src) {
1778   int j = 0;
1779   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1780     if (pg.IsActive(vform, i)) {
1781       dst.SetUint(vform, j++, src.Uint(vform, i));
1782     }
1783   }
1784   for (; j < LaneCountFromFormat(vform); j++) {
1785     dst.SetUint(vform, j, 0);
1786   }
1787   return dst;
1788 }
1789 
splice(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1790 LogicVRegister Simulator::splice(VectorFormat vform,
1791                                  LogicVRegister dst,
1792                                  const LogicPRegister& pg,
1793                                  const LogicVRegister& src1,
1794                                  const LogicVRegister& src2) {
1795   int lane_count = LaneCountFromFormat(vform);
1796   int first_active = GetFirstActive(vform, pg);
1797   int last_active = GetLastActive(vform, pg);
1798   int dst_idx = 0;
1799   uint64_t result[kZRegMaxSizeInBytes];
1800 
1801   if (first_active >= 0) {
1802     VIXL_ASSERT(last_active >= first_active);
1803     VIXL_ASSERT(last_active < lane_count);
1804     for (int i = first_active; i <= last_active; i++) {
1805       result[dst_idx++] = src1.Uint(vform, i);
1806     }
1807   }
1808 
1809   VIXL_ASSERT(dst_idx <= lane_count);
1810   for (int i = dst_idx; i < lane_count; i++) {
1811     result[i] = src2.Uint(vform, i - dst_idx);
1812   }
1813 
1814   for (int i = 0; i < lane_count; i++) {
1815     dst.SetUint(vform, i, result[i]);
1816   }
1817   return dst;
1818 }
1819 
sel(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1820 LogicVRegister Simulator::sel(VectorFormat vform,
1821                               LogicVRegister dst,
1822                               const SimPRegister& pg,
1823                               const LogicVRegister& src1,
1824                               const LogicVRegister& src2) {
1825   int p_reg_bits_per_lane =
1826       LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1827   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1828     uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1829                               ? src1.Uint(vform, lane)
1830                               : src2.Uint(vform, lane);
1831     dst.SetUint(vform, lane, lane_value);
1832   }
1833   return dst;
1834 }
1835 
1836 
sel(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src1,const LogicPRegister & src2)1837 LogicPRegister Simulator::sel(LogicPRegister dst,
1838                               const LogicPRegister& pg,
1839                               const LogicPRegister& src1,
1840                               const LogicPRegister& src2) {
1841   for (int i = 0; i < dst.GetChunkCount(); i++) {
1842     LogicPRegister::ChunkType mask = pg.GetChunk(i);
1843     LogicPRegister::ChunkType result =
1844         (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1845     dst.SetChunk(i, result);
1846   }
1847   return dst;
1848 }
1849 
1850 
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1851 LogicVRegister Simulator::sli(VectorFormat vform,
1852                               LogicVRegister dst,
1853                               const LogicVRegister& src,
1854                               int shift) {
1855   dst.ClearForWrite(vform);
1856   int lane_count = LaneCountFromFormat(vform);
1857   for (int i = 0; i < lane_count; i++) {
1858     uint64_t src_lane = src.Uint(vform, i);
1859     uint64_t dst_lane = dst.Uint(vform, i);
1860     uint64_t shifted = src_lane << shift;
1861     uint64_t mask = MaxUintFromFormat(vform) << shift;
1862     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1863   }
1864   return dst;
1865 }
1866 
1867 
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1868 LogicVRegister Simulator::sqshl(VectorFormat vform,
1869                                 LogicVRegister dst,
1870                                 const LogicVRegister& src,
1871                                 int shift) {
1872   VIXL_ASSERT(shift >= 0);
1873   SimVRegister temp;
1874   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1875   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1876 }
1877 
1878 
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1879 LogicVRegister Simulator::uqshl(VectorFormat vform,
1880                                 LogicVRegister dst,
1881                                 const LogicVRegister& src,
1882                                 int shift) {
1883   VIXL_ASSERT(shift >= 0);
1884   SimVRegister temp;
1885   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1886   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1887 }
1888 
1889 
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1890 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1891                                  LogicVRegister dst,
1892                                  const LogicVRegister& src,
1893                                  int shift) {
1894   VIXL_ASSERT(shift >= 0);
1895   SimVRegister temp;
1896   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1897   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1898 }
1899 
1900 
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1901 LogicVRegister Simulator::sri(VectorFormat vform,
1902                               LogicVRegister dst,
1903                               const LogicVRegister& src,
1904                               int shift) {
1905   dst.ClearForWrite(vform);
1906   int lane_count = LaneCountFromFormat(vform);
1907   VIXL_ASSERT((shift > 0) &&
1908               (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1909   for (int i = 0; i < lane_count; i++) {
1910     uint64_t src_lane = src.Uint(vform, i);
1911     uint64_t dst_lane = dst.Uint(vform, i);
1912     uint64_t shifted;
1913     uint64_t mask;
1914     if (shift == 64) {
1915       shifted = 0;
1916       mask = 0;
1917     } else {
1918       shifted = src_lane >> shift;
1919       mask = MaxUintFromFormat(vform) >> shift;
1920     }
1921     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1922   }
1923   return dst;
1924 }
1925 
1926 
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1927 LogicVRegister Simulator::ushr(VectorFormat vform,
1928                                LogicVRegister dst,
1929                                const LogicVRegister& src,
1930                                int shift) {
1931   VIXL_ASSERT(shift >= 0);
1932   SimVRegister temp;
1933   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1934   return ushl(vform, dst, src, shiftreg);
1935 }
1936 
1937 
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1938 LogicVRegister Simulator::sshr(VectorFormat vform,
1939                                LogicVRegister dst,
1940                                const LogicVRegister& src,
1941                                int shift) {
1942   VIXL_ASSERT(shift >= 0);
1943   SimVRegister temp;
1944   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1945   return sshl(vform, dst, src, shiftreg);
1946 }
1947 
1948 
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1949 LogicVRegister Simulator::ssra(VectorFormat vform,
1950                                LogicVRegister dst,
1951                                const LogicVRegister& src,
1952                                int shift) {
1953   SimVRegister temp;
1954   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1955   return add(vform, dst, dst, shifted_reg);
1956 }
1957 
1958 
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1959 LogicVRegister Simulator::usra(VectorFormat vform,
1960                                LogicVRegister dst,
1961                                const LogicVRegister& src,
1962                                int shift) {
1963   SimVRegister temp;
1964   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1965   return add(vform, dst, dst, shifted_reg);
1966 }
1967 
1968 
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1969 LogicVRegister Simulator::srsra(VectorFormat vform,
1970                                 LogicVRegister dst,
1971                                 const LogicVRegister& src,
1972                                 int shift) {
1973   SimVRegister temp;
1974   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1975   return add(vform, dst, dst, shifted_reg);
1976 }
1977 
1978 
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1979 LogicVRegister Simulator::ursra(VectorFormat vform,
1980                                 LogicVRegister dst,
1981                                 const LogicVRegister& src,
1982                                 int shift) {
1983   SimVRegister temp;
1984   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1985   return add(vform, dst, dst, shifted_reg);
1986 }
1987 
1988 
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1989 LogicVRegister Simulator::cls(VectorFormat vform,
1990                               LogicVRegister dst,
1991                               const LogicVRegister& src) {
1992   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1993   int lane_count = LaneCountFromFormat(vform);
1994 
1995   // Ensure that we can store one result per lane.
1996   int result[kZRegMaxSizeInBytes];
1997 
1998   for (int i = 0; i < lane_count; i++) {
1999     result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
2000   }
2001 
2002   dst.ClearForWrite(vform);
2003   for (int i = 0; i < lane_count; ++i) {
2004     dst.SetUint(vform, i, result[i]);
2005   }
2006   return dst;
2007 }
2008 
2009 
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2010 LogicVRegister Simulator::clz(VectorFormat vform,
2011                               LogicVRegister dst,
2012                               const LogicVRegister& src) {
2013   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2014   int lane_count = LaneCountFromFormat(vform);
2015 
2016   // Ensure that we can store one result per lane.
2017   int result[kZRegMaxSizeInBytes];
2018 
2019   for (int i = 0; i < lane_count; i++) {
2020     result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
2021   }
2022 
2023   dst.ClearForWrite(vform);
2024   for (int i = 0; i < lane_count; ++i) {
2025     dst.SetUint(vform, i, result[i]);
2026   }
2027   return dst;
2028 }
2029 
2030 
cnot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2031 LogicVRegister Simulator::cnot(VectorFormat vform,
2032                                LogicVRegister dst,
2033                                const LogicVRegister& src) {
2034   dst.ClearForWrite(vform);
2035   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2036     uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
2037     dst.SetUint(vform, i, value);
2038   }
2039   return dst;
2040 }
2041 
2042 
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2043 LogicVRegister Simulator::cnt(VectorFormat vform,
2044                               LogicVRegister dst,
2045                               const LogicVRegister& src) {
2046   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2047   int lane_count = LaneCountFromFormat(vform);
2048 
2049   // Ensure that we can store one result per lane.
2050   int result[kZRegMaxSizeInBytes];
2051 
2052   for (int i = 0; i < lane_count; i++) {
2053     result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
2054   }
2055 
2056   dst.ClearForWrite(vform);
2057   for (int i = 0; i < lane_count; ++i) {
2058     dst.SetUint(vform, i, result[i]);
2059   }
2060   return dst;
2061 }
2062 
2063 
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2064 LogicVRegister Simulator::sshl(VectorFormat vform,
2065                                LogicVRegister dst,
2066                                const LogicVRegister& src1,
2067                                const LogicVRegister& src2) {
2068   dst.ClearForWrite(vform);
2069   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2070     int8_t shift_val = src2.Int(vform, i);
2071     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
2072 
2073     // Set signed saturation state.
2074     if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
2075       dst.SetSignedSat(i, lj_src_val >= 0);
2076     }
2077 
2078     // Set unsigned saturation state.
2079     if (lj_src_val < 0) {
2080       dst.SetUnsignedSat(i, false);
2081     } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
2082                (lj_src_val != 0)) {
2083       dst.SetUnsignedSat(i, true);
2084     }
2085 
2086     int64_t src_val = src1.Int(vform, i);
2087     bool src_is_negative = src_val < 0;
2088     if (shift_val > 63) {
2089       dst.SetInt(vform, i, 0);
2090     } else if (shift_val < -63) {
2091       dst.SetRounding(i, src_is_negative);
2092       dst.SetInt(vform, i, src_is_negative ? -1 : 0);
2093     } else {
2094       // Use unsigned types for shifts, as behaviour is undefined for signed
2095       // lhs.
2096       uint64_t usrc_val = static_cast<uint64_t>(src_val);
2097 
2098       if (shift_val < 0) {
2099         // Convert to right shift.
2100         shift_val = -shift_val;
2101 
2102         // Set rounding state by testing most-significant bit shifted out.
2103         // Rounding only needed on right shifts.
2104         if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
2105           dst.SetRounding(i, true);
2106         }
2107 
2108         usrc_val >>= shift_val;
2109 
2110         if (src_is_negative) {
2111           // Simulate sign-extension.
2112           usrc_val |= (~UINT64_C(0) << (64 - shift_val));
2113         }
2114       } else {
2115         usrc_val <<= shift_val;
2116       }
2117       dst.SetUint(vform, i, usrc_val);
2118     }
2119   }
2120   return dst;
2121 }
2122 
2123 
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2124 LogicVRegister Simulator::ushl(VectorFormat vform,
2125                                LogicVRegister dst,
2126                                const LogicVRegister& src1,
2127                                const LogicVRegister& src2) {
2128   dst.ClearForWrite(vform);
2129   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2130     int8_t shift_val = src2.Int(vform, i);
2131     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
2132 
2133     // Set saturation state.
2134     if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
2135       dst.SetUnsignedSat(i, true);
2136     }
2137 
2138     uint64_t src_val = src1.Uint(vform, i);
2139     if ((shift_val > 63) || (shift_val < -64)) {
2140       dst.SetUint(vform, i, 0);
2141     } else {
2142       if (shift_val < 0) {
2143         // Set rounding state. Rounding only needed on right shifts.
2144         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
2145           dst.SetRounding(i, true);
2146         }
2147 
2148         if (shift_val == -64) {
2149           src_val = 0;
2150         } else {
2151           src_val >>= -shift_val;
2152         }
2153       } else {
2154         src_val <<= shift_val;
2155       }
2156       dst.SetUint(vform, i, src_val);
2157     }
2158   }
2159   return dst;
2160 }
2161 
2162 
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2163 LogicVRegister Simulator::neg(VectorFormat vform,
2164                               LogicVRegister dst,
2165                               const LogicVRegister& src) {
2166   dst.ClearForWrite(vform);
2167   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2168     // Test for signed saturation.
2169     int64_t sa = src.Int(vform, i);
2170     if (sa == MinIntFromFormat(vform)) {
2171       dst.SetSignedSat(i, true);
2172     }
2173     dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2174   }
2175   return dst;
2176 }
2177 
2178 
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2179 LogicVRegister Simulator::suqadd(VectorFormat vform,
2180                                  LogicVRegister dst,
2181                                  const LogicVRegister& src) {
2182   dst.ClearForWrite(vform);
2183   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2184     int64_t sa = dst.IntLeftJustified(vform, i);
2185     uint64_t ub = src.UintLeftJustified(vform, i);
2186     uint64_t ur = sa + ub;
2187 
2188     int64_t sr;
2189     memcpy(&sr, &ur, sizeof(sr));
2190     if (sr < sa) {  // Test for signed positive saturation.
2191       dst.SetInt(vform, i, MaxIntFromFormat(vform));
2192     } else {
2193       dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
2194     }
2195   }
2196   return dst;
2197 }
2198 
2199 
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2200 LogicVRegister Simulator::usqadd(VectorFormat vform,
2201                                  LogicVRegister dst,
2202                                  const LogicVRegister& src) {
2203   dst.ClearForWrite(vform);
2204   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2205     uint64_t ua = dst.UintLeftJustified(vform, i);
2206     int64_t sb = src.IntLeftJustified(vform, i);
2207     uint64_t ur = ua + sb;
2208 
2209     if ((sb > 0) && (ur <= ua)) {
2210       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
2211     } else if ((sb < 0) && (ur >= ua)) {
2212       dst.SetUint(vform, i, 0);  // Negative saturation.
2213     } else {
2214       dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
2215     }
2216   }
2217   return dst;
2218 }
2219 
2220 
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2221 LogicVRegister Simulator::abs(VectorFormat vform,
2222                               LogicVRegister dst,
2223                               const LogicVRegister& src) {
2224   dst.ClearForWrite(vform);
2225   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2226     // Test for signed saturation.
2227     int64_t sa = src.Int(vform, i);
2228     if (sa == MinIntFromFormat(vform)) {
2229       dst.SetSignedSat(i, true);
2230     }
2231     if (sa < 0) {
2232       dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2233     } else {
2234       dst.SetInt(vform, i, sa);
2235     }
2236   }
2237   return dst;
2238 }
2239 
2240 
andv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2241 LogicVRegister Simulator::andv(VectorFormat vform,
2242                                LogicVRegister dst,
2243                                const LogicPRegister& pg,
2244                                const LogicVRegister& src) {
2245   VIXL_ASSERT(IsSVEFormat(vform));
2246   uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2247   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2248     if (!pg.IsActive(vform, i)) continue;
2249 
2250     result &= src.Uint(vform, i);
2251   }
2252   VectorFormat vform_dst =
2253       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2254   dst.ClearForWrite(vform_dst);
2255   dst.SetUint(vform_dst, 0, result);
2256   return dst;
2257 }
2258 
2259 
eorv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2260 LogicVRegister Simulator::eorv(VectorFormat vform,
2261                                LogicVRegister dst,
2262                                const LogicPRegister& pg,
2263                                const LogicVRegister& src) {
2264   VIXL_ASSERT(IsSVEFormat(vform));
2265   uint64_t result = 0;
2266   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2267     if (!pg.IsActive(vform, i)) continue;
2268 
2269     result ^= src.Uint(vform, i);
2270   }
2271   VectorFormat vform_dst =
2272       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2273   dst.ClearForWrite(vform_dst);
2274   dst.SetUint(vform_dst, 0, result);
2275   return dst;
2276 }
2277 
2278 
orv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2279 LogicVRegister Simulator::orv(VectorFormat vform,
2280                               LogicVRegister dst,
2281                               const LogicPRegister& pg,
2282                               const LogicVRegister& src) {
2283   VIXL_ASSERT(IsSVEFormat(vform));
2284   uint64_t result = 0;
2285   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2286     if (!pg.IsActive(vform, i)) continue;
2287 
2288     result |= src.Uint(vform, i);
2289   }
2290   VectorFormat vform_dst =
2291       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2292   dst.ClearForWrite(vform_dst);
2293   dst.SetUint(vform_dst, 0, result);
2294   return dst;
2295 }
2296 
2297 
saddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2298 LogicVRegister Simulator::saddv(VectorFormat vform,
2299                                 LogicVRegister dst,
2300                                 const LogicPRegister& pg,
2301                                 const LogicVRegister& src) {
2302   VIXL_ASSERT(IsSVEFormat(vform));
2303   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2304   int64_t result = 0;
2305   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2306     if (!pg.IsActive(vform, i)) continue;
2307 
2308     // The destination register always has D-lane sizes and the source register
2309     // always has S-lanes or smaller, so signed integer overflow -- undefined
2310     // behaviour -- can't occur.
2311     result += src.Int(vform, i);
2312   }
2313 
2314   dst.ClearForWrite(kFormatD);
2315   dst.SetInt(kFormatD, 0, result);
2316   return dst;
2317 }
2318 
2319 
uaddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2320 LogicVRegister Simulator::uaddv(VectorFormat vform,
2321                                 LogicVRegister dst,
2322                                 const LogicPRegister& pg,
2323                                 const LogicVRegister& src) {
2324   VIXL_ASSERT(IsSVEFormat(vform));
2325   uint64_t result = 0;
2326   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2327     if (!pg.IsActive(vform, i)) continue;
2328 
2329     result += src.Uint(vform, i);
2330   }
2331 
2332   dst.ClearForWrite(kFormatD);
2333   dst.SetUint(kFormatD, 0, result);
2334   return dst;
2335 }
2336 
2337 
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dst_is_signed,const LogicVRegister & src,bool src_is_signed)2338 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2339                                         LogicVRegister dst,
2340                                         bool dst_is_signed,
2341                                         const LogicVRegister& src,
2342                                         bool src_is_signed) {
2343   bool upperhalf = false;
2344   VectorFormat srcform = kFormatUndefined;
2345   int64_t ssrc[8];
2346   uint64_t usrc[8];
2347 
2348   switch (dstform) {
2349     case kFormat8B:
2350       upperhalf = false;
2351       srcform = kFormat8H;
2352       break;
2353     case kFormat16B:
2354       upperhalf = true;
2355       srcform = kFormat8H;
2356       break;
2357     case kFormat4H:
2358       upperhalf = false;
2359       srcform = kFormat4S;
2360       break;
2361     case kFormat8H:
2362       upperhalf = true;
2363       srcform = kFormat4S;
2364       break;
2365     case kFormat2S:
2366       upperhalf = false;
2367       srcform = kFormat2D;
2368       break;
2369     case kFormat4S:
2370       upperhalf = true;
2371       srcform = kFormat2D;
2372       break;
2373     case kFormatB:
2374       upperhalf = false;
2375       srcform = kFormatH;
2376       break;
2377     case kFormatH:
2378       upperhalf = false;
2379       srcform = kFormatS;
2380       break;
2381     case kFormatS:
2382       upperhalf = false;
2383       srcform = kFormatD;
2384       break;
2385     default:
2386       VIXL_UNIMPLEMENTED();
2387   }
2388 
2389   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2390     ssrc[i] = src.Int(srcform, i);
2391     usrc[i] = src.Uint(srcform, i);
2392   }
2393 
2394   int offset;
2395   if (upperhalf) {
2396     offset = LaneCountFromFormat(dstform) / 2;
2397   } else {
2398     offset = 0;
2399     dst.ClearForWrite(dstform);
2400   }
2401 
2402   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2403     // Test for signed saturation
2404     if (ssrc[i] > MaxIntFromFormat(dstform)) {
2405       dst.SetSignedSat(offset + i, true);
2406     } else if (ssrc[i] < MinIntFromFormat(dstform)) {
2407       dst.SetSignedSat(offset + i, false);
2408     }
2409 
2410     // Test for unsigned saturation
2411     if (src_is_signed) {
2412       if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2413         dst.SetUnsignedSat(offset + i, true);
2414       } else if (ssrc[i] < 0) {
2415         dst.SetUnsignedSat(offset + i, false);
2416       }
2417     } else {
2418       if (usrc[i] > MaxUintFromFormat(dstform)) {
2419         dst.SetUnsignedSat(offset + i, true);
2420       }
2421     }
2422 
2423     int64_t result;
2424     if (src_is_signed) {
2425       result = ssrc[i] & MaxUintFromFormat(dstform);
2426     } else {
2427       result = usrc[i] & MaxUintFromFormat(dstform);
2428     }
2429 
2430     if (dst_is_signed) {
2431       dst.SetInt(dstform, offset + i, result);
2432     } else {
2433       dst.SetUint(dstform, offset + i, result);
2434     }
2435   }
2436   return dst;
2437 }
2438 
2439 
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2440 LogicVRegister Simulator::xtn(VectorFormat vform,
2441                               LogicVRegister dst,
2442                               const LogicVRegister& src) {
2443   return extractnarrow(vform, dst, true, src, true);
2444 }
2445 
2446 
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2447 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2448                                 LogicVRegister dst,
2449                                 const LogicVRegister& src) {
2450   return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2451 }
2452 
2453 
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2454 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2455                                  LogicVRegister dst,
2456                                  const LogicVRegister& src) {
2457   return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2458 }
2459 
2460 
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2461 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2462                                 LogicVRegister dst,
2463                                 const LogicVRegister& src) {
2464   return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2465 }
2466 
2467 
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_signed)2468 LogicVRegister Simulator::absdiff(VectorFormat vform,
2469                                   LogicVRegister dst,
2470                                   const LogicVRegister& src1,
2471                                   const LogicVRegister& src2,
2472                                   bool is_signed) {
2473   dst.ClearForWrite(vform);
2474   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2475     bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2476                                   : (src1.Uint(vform, i) > src2.Uint(vform, i));
2477     // Always calculate the answer using unsigned arithmetic, to avoid
2478     // implemenation-defined signed overflow.
2479     if (src1_gt_src2) {
2480       dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2481     } else {
2482       dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2483     }
2484   }
2485   return dst;
2486 }
2487 
2488 
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2489 LogicVRegister Simulator::saba(VectorFormat vform,
2490                                LogicVRegister dst,
2491                                const LogicVRegister& src1,
2492                                const LogicVRegister& src2) {
2493   SimVRegister temp;
2494   dst.ClearForWrite(vform);
2495   absdiff(vform, temp, src1, src2, true);
2496   add(vform, dst, dst, temp);
2497   return dst;
2498 }
2499 
2500 
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2501 LogicVRegister Simulator::uaba(VectorFormat vform,
2502                                LogicVRegister dst,
2503                                const LogicVRegister& src1,
2504                                const LogicVRegister& src2) {
2505   SimVRegister temp;
2506   dst.ClearForWrite(vform);
2507   absdiff(vform, temp, src1, src2, false);
2508   add(vform, dst, dst, temp);
2509   return dst;
2510 }
2511 
2512 
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2513 LogicVRegister Simulator::not_(VectorFormat vform,
2514                                LogicVRegister dst,
2515                                const LogicVRegister& src) {
2516   dst.ClearForWrite(vform);
2517   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2518     dst.SetUint(vform, i, ~src.Uint(vform, i));
2519   }
2520   return dst;
2521 }
2522 
2523 
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2524 LogicVRegister Simulator::rbit(VectorFormat vform,
2525                                LogicVRegister dst,
2526                                const LogicVRegister& src) {
2527   uint64_t result[kZRegMaxSizeInBytes];
2528   int lane_count = LaneCountFromFormat(vform);
2529   int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2530   uint64_t reversed_value;
2531   uint64_t value;
2532   for (int i = 0; i < lane_count; i++) {
2533     value = src.Uint(vform, i);
2534     reversed_value = 0;
2535     for (int j = 0; j < lane_size_in_bits; j++) {
2536       reversed_value = (reversed_value << 1) | (value & 1);
2537       value >>= 1;
2538     }
2539     result[i] = reversed_value;
2540   }
2541 
2542   dst.ClearForWrite(vform);
2543   for (int i = 0; i < lane_count; ++i) {
2544     dst.SetUint(vform, i, result[i]);
2545   }
2546   return dst;
2547 }
2548 
2549 
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2550 LogicVRegister Simulator::rev(VectorFormat vform,
2551                               LogicVRegister dst,
2552                               const LogicVRegister& src) {
2553   VIXL_ASSERT(IsSVEFormat(vform));
2554   int lane_count = LaneCountFromFormat(vform);
2555   for (int i = 0; i < lane_count / 2; i++) {
2556     uint64_t t = src.Uint(vform, i);
2557     dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2558     dst.SetUint(vform, lane_count - i - 1, t);
2559   }
2560   return dst;
2561 }
2562 
2563 
rev_byte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rev_size)2564 LogicVRegister Simulator::rev_byte(VectorFormat vform,
2565                                    LogicVRegister dst,
2566                                    const LogicVRegister& src,
2567                                    int rev_size) {
2568   uint64_t result[kZRegMaxSizeInBytes];
2569   int lane_count = LaneCountFromFormat(vform);
2570   int lane_size = LaneSizeInBytesFromFormat(vform);
2571   int lanes_per_loop = rev_size / lane_size;
2572   for (int i = 0; i < lane_count; i += lanes_per_loop) {
2573     for (int j = 0; j < lanes_per_loop; j++) {
2574       result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2575     }
2576   }
2577   dst.ClearForWrite(vform);
2578   for (int i = 0; i < lane_count; ++i) {
2579     dst.SetUint(vform, i, result[i]);
2580   }
2581   return dst;
2582 }
2583 
2584 
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2585 LogicVRegister Simulator::rev16(VectorFormat vform,
2586                                 LogicVRegister dst,
2587                                 const LogicVRegister& src) {
2588   return rev_byte(vform, dst, src, 2);
2589 }
2590 
2591 
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2592 LogicVRegister Simulator::rev32(VectorFormat vform,
2593                                 LogicVRegister dst,
2594                                 const LogicVRegister& src) {
2595   return rev_byte(vform, dst, src, 4);
2596 }
2597 
2598 
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2599 LogicVRegister Simulator::rev64(VectorFormat vform,
2600                                 LogicVRegister dst,
2601                                 const LogicVRegister& src) {
2602   return rev_byte(vform, dst, src, 8);
2603 }
2604 
2605 
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2606 LogicVRegister Simulator::addlp(VectorFormat vform,
2607                                 LogicVRegister dst,
2608                                 const LogicVRegister& src,
2609                                 bool is_signed,
2610                                 bool do_accumulate) {
2611   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2612   VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32);
2613   VIXL_ASSERT(LaneCountFromFormat(vform) <= 8);
2614 
2615   uint64_t result[8];
2616   int lane_count = LaneCountFromFormat(vform);
2617   for (int i = 0; i < lane_count; i++) {
2618     if (is_signed) {
2619       result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2620                                         src.Int(vformsrc, 2 * i + 1));
2621     } else {
2622       result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2623     }
2624   }
2625 
2626   dst.ClearForWrite(vform);
2627   for (int i = 0; i < lane_count; ++i) {
2628     if (do_accumulate) {
2629       result[i] += dst.Uint(vform, i);
2630     }
2631     dst.SetUint(vform, i, result[i]);
2632   }
2633 
2634   return dst;
2635 }
2636 
2637 
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2638 LogicVRegister Simulator::saddlp(VectorFormat vform,
2639                                  LogicVRegister dst,
2640                                  const LogicVRegister& src) {
2641   return addlp(vform, dst, src, true, false);
2642 }
2643 
2644 
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2645 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2646                                  LogicVRegister dst,
2647                                  const LogicVRegister& src) {
2648   return addlp(vform, dst, src, false, false);
2649 }
2650 
2651 
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2652 LogicVRegister Simulator::sadalp(VectorFormat vform,
2653                                  LogicVRegister dst,
2654                                  const LogicVRegister& src) {
2655   return addlp(vform, dst, src, true, true);
2656 }
2657 
2658 
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2659 LogicVRegister Simulator::uadalp(VectorFormat vform,
2660                                  LogicVRegister dst,
2661                                  const LogicVRegister& src) {
2662   return addlp(vform, dst, src, false, true);
2663 }
2664 
2665 
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2666 LogicVRegister Simulator::ext(VectorFormat vform,
2667                               LogicVRegister dst,
2668                               const LogicVRegister& src1,
2669                               const LogicVRegister& src2,
2670                               int index) {
2671   uint8_t result[kZRegMaxSizeInBytes];
2672   int lane_count = LaneCountFromFormat(vform);
2673   for (int i = 0; i < lane_count - index; ++i) {
2674     result[i] = src1.Uint(vform, i + index);
2675   }
2676   for (int i = 0; i < index; ++i) {
2677     result[lane_count - index + i] = src2.Uint(vform, i);
2678   }
2679   dst.ClearForWrite(vform);
2680   for (int i = 0; i < lane_count; ++i) {
2681     dst.SetUint(vform, i, result[i]);
2682   }
2683   return dst;
2684 }
2685 
2686 template <typename T>
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2687 LogicVRegister Simulator::fadda(VectorFormat vform,
2688                                 LogicVRegister acc,
2689                                 const LogicPRegister& pg,
2690                                 const LogicVRegister& src) {
2691   T result = acc.Float<T>(0);
2692   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2693     if (!pg.IsActive(vform, i)) continue;
2694 
2695     result = FPAdd(result, src.Float<T>(i));
2696   }
2697   VectorFormat vform_dst =
2698       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2699   acc.ClearForWrite(vform_dst);
2700   acc.SetFloat(0, result);
2701   return acc;
2702 }
2703 
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2704 LogicVRegister Simulator::fadda(VectorFormat vform,
2705                                 LogicVRegister acc,
2706                                 const LogicPRegister& pg,
2707                                 const LogicVRegister& src) {
2708   switch (LaneSizeInBitsFromFormat(vform)) {
2709     case kHRegSize:
2710       fadda<SimFloat16>(vform, acc, pg, src);
2711       break;
2712     case kSRegSize:
2713       fadda<float>(vform, acc, pg, src);
2714       break;
2715     case kDRegSize:
2716       fadda<double>(vform, acc, pg, src);
2717       break;
2718     default:
2719       VIXL_UNREACHABLE();
2720   }
2721   return acc;
2722 }
2723 
2724 template <typename T>
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2725 LogicVRegister Simulator::fcadd(VectorFormat vform,
2726                                 LogicVRegister dst,          // d
2727                                 const LogicVRegister& src1,  // n
2728                                 const LogicVRegister& src2,  // m
2729                                 int rot) {
2730   int elements = LaneCountFromFormat(vform);
2731 
2732   T element1, element3;
2733   rot = (rot == 1) ? 270 : 90;
2734 
2735   // Loop example:
2736   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2737   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2738 
2739   for (int e = 0; e <= (elements / 2) - 1; e++) {
2740     switch (rot) {
2741       case 90:
2742         element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2743         element3 = src2.Float<T>(e * 2);
2744         break;
2745       case 270:
2746         element1 = src2.Float<T>(e * 2 + 1);
2747         element3 = FPNeg(src2.Float<T>(e * 2));
2748         break;
2749       default:
2750         VIXL_UNREACHABLE();
2751         return dst;  // prevents "element(n) may be unintialized" errors
2752     }
2753     dst.ClearForWrite(vform);
2754     dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2755     dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2756   }
2757   return dst;
2758 }
2759 
2760 
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2761 LogicVRegister Simulator::fcadd(VectorFormat vform,
2762                                 LogicVRegister dst,          // d
2763                                 const LogicVRegister& src1,  // n
2764                                 const LogicVRegister& src2,  // m
2765                                 int rot) {
2766   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2767     fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2768   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2769     fcadd<float>(vform, dst, src1, src2, rot);
2770   } else {
2771     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2772     fcadd<double>(vform, dst, src1, src2, rot);
2773   }
2774   return dst;
2775 }
2776 
2777 template <typename T>
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int index,int rot)2778 LogicVRegister Simulator::fcmla(VectorFormat vform,
2779                                 LogicVRegister dst,
2780                                 const LogicVRegister& src1,
2781                                 const LogicVRegister& src2,
2782                                 const LogicVRegister& acc,
2783                                 int index,
2784                                 int rot) {
2785   int elements = LaneCountFromFormat(vform);
2786 
2787   T element1, element2, element3, element4;
2788   rot *= 90;
2789 
2790   // Loop example:
2791   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2792   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2793 
2794   for (int e = 0; e <= (elements / 2) - 1; e++) {
2795     // Index == -1 indicates a vector/vector rather than vector/indexed-element
2796     // operation.
2797     int f = (index < 0) ? e : index;
2798 
2799     switch (rot) {
2800       case 0:
2801         element1 = src2.Float<T>(f * 2);
2802         element2 = src1.Float<T>(e * 2);
2803         element3 = src2.Float<T>(f * 2 + 1);
2804         element4 = src1.Float<T>(e * 2);
2805         break;
2806       case 90:
2807         element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2808         element2 = src1.Float<T>(e * 2 + 1);
2809         element3 = src2.Float<T>(f * 2);
2810         element4 = src1.Float<T>(e * 2 + 1);
2811         break;
2812       case 180:
2813         element1 = FPNeg(src2.Float<T>(f * 2));
2814         element2 = src1.Float<T>(e * 2);
2815         element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2816         element4 = src1.Float<T>(e * 2);
2817         break;
2818       case 270:
2819         element1 = src2.Float<T>(f * 2 + 1);
2820         element2 = src1.Float<T>(e * 2 + 1);
2821         element3 = FPNeg(src2.Float<T>(f * 2));
2822         element4 = src1.Float<T>(e * 2 + 1);
2823         break;
2824       default:
2825         VIXL_UNREACHABLE();
2826         return dst;  // prevents "element(n) may be unintialized" errors
2827     }
2828     dst.ClearForWrite(vform);
2829     dst.SetFloat<T>(vform,
2830                     e * 2,
2831                     FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2832     dst.SetFloat<T>(vform,
2833                     e * 2 + 1,
2834                     FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2835   }
2836   return dst;
2837 }
2838 
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int rot)2839 LogicVRegister Simulator::fcmla(VectorFormat vform,
2840                                 LogicVRegister dst,
2841                                 const LogicVRegister& src1,
2842                                 const LogicVRegister& src2,
2843                                 const LogicVRegister& acc,
2844                                 int rot) {
2845   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2846     fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2847   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2848     fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2849   } else {
2850     fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2851   }
2852   return dst;
2853 }
2854 
2855 
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2856 LogicVRegister Simulator::fcmla(VectorFormat vform,
2857                                 LogicVRegister dst,          // d
2858                                 const LogicVRegister& src1,  // n
2859                                 const LogicVRegister& src2,  // m
2860                                 int index,
2861                                 int rot) {
2862   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2863     VIXL_UNIMPLEMENTED();
2864   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2865     fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2866   } else {
2867     fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2868   }
2869   return dst;
2870 }
2871 
2872 
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2873 LogicVRegister Simulator::dup_element(VectorFormat vform,
2874                                       LogicVRegister dst,
2875                                       const LogicVRegister& src,
2876                                       int src_index) {
2877   if (vform == kFormatVnQ) {
2878     // When duplicating a 128-bit value, split it into two 64-bit parts, and
2879     // then copy the two to their slots on destination register.
2880     uint64_t low = src.Uint(kFormatVnD, src_index * 2);
2881     uint64_t high = src.Uint(kFormatVnD, (src_index * 2) + 1);
2882     dst.ClearForWrite(vform);
2883     for (int d_lane = 0; d_lane < LaneCountFromFormat(kFormatVnD);
2884          d_lane += 2) {
2885       dst.SetUint(kFormatVnD, d_lane, low);
2886       dst.SetUint(kFormatVnD, d_lane + 1, high);
2887     }
2888   } else {
2889     int lane_count = LaneCountFromFormat(vform);
2890     uint64_t value = src.Uint(vform, src_index);
2891     dst.ClearForWrite(vform);
2892     for (int i = 0; i < lane_count; ++i) {
2893       dst.SetUint(vform, i, value);
2894     }
2895   }
2896   return dst;
2897 }
2898 
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2899 LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2900                                                    LogicVRegister dst,
2901                                                    const LogicVRegister& src,
2902                                                    int src_index) {
2903   // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2904   // whereas in NEON, the size of segment is equal to the size of register
2905   // itself.
2906   int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2907   VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2908   int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2909 
2910   VIXL_ASSERT(src_index >= 0);
2911   VIXL_ASSERT(src_index < lanes_per_segment);
2912 
2913   dst.ClearForWrite(vform);
2914   for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2915     uint64_t value = src.Uint(vform, j + src_index);
2916     for (int i = 0; i < lanes_per_segment; i++) {
2917       dst.SetUint(vform, j + i, value);
2918     }
2919   }
2920   return dst;
2921 }
2922 
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2923 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2924                                         LogicVRegister dst,
2925                                         uint64_t imm) {
2926   int lane_count = LaneCountFromFormat(vform);
2927   uint64_t value = imm & MaxUintFromFormat(vform);
2928   dst.ClearForWrite(vform);
2929   for (int i = 0; i < lane_count; ++i) {
2930     dst.SetUint(vform, i, value);
2931   }
2932   return dst;
2933 }
2934 
2935 
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2936 LogicVRegister Simulator::ins_element(VectorFormat vform,
2937                                       LogicVRegister dst,
2938                                       int dst_index,
2939                                       const LogicVRegister& src,
2940                                       int src_index) {
2941   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2942   return dst;
2943 }
2944 
2945 
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2946 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2947                                         LogicVRegister dst,
2948                                         int dst_index,
2949                                         uint64_t imm) {
2950   uint64_t value = imm & MaxUintFromFormat(vform);
2951   dst.SetUint(vform, dst_index, value);
2952   return dst;
2953 }
2954 
2955 
index(VectorFormat vform,LogicVRegister dst,uint64_t start,uint64_t step)2956 LogicVRegister Simulator::index(VectorFormat vform,
2957                                 LogicVRegister dst,
2958                                 uint64_t start,
2959                                 uint64_t step) {
2960   VIXL_ASSERT(IsSVEFormat(vform));
2961   uint64_t value = start;
2962   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2963     dst.SetUint(vform, i, value);
2964     value += step;
2965   }
2966   return dst;
2967 }
2968 
2969 
insr(VectorFormat vform,LogicVRegister dst,uint64_t imm)2970 LogicVRegister Simulator::insr(VectorFormat vform,
2971                                LogicVRegister dst,
2972                                uint64_t imm) {
2973   VIXL_ASSERT(IsSVEFormat(vform));
2974   for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
2975     dst.SetUint(vform, i, dst.Uint(vform, i - 1));
2976   }
2977   dst.SetUint(vform, 0, imm);
2978   return dst;
2979 }
2980 
2981 
mov(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2982 LogicVRegister Simulator::mov(VectorFormat vform,
2983                               LogicVRegister dst,
2984                               const LogicVRegister& src) {
2985   dst.ClearForWrite(vform);
2986   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
2987     dst.SetUint(vform, lane, src.Uint(vform, lane));
2988   }
2989   return dst;
2990 }
2991 
2992 
mov(LogicPRegister dst,const LogicPRegister & src)2993 LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
2994   // Avoid a copy if the registers already alias.
2995   if (dst.Aliases(src)) return dst;
2996 
2997   for (int i = 0; i < dst.GetChunkCount(); i++) {
2998     dst.SetChunk(i, src.GetChunk(i));
2999   }
3000   return dst;
3001 }
3002 
3003 
mov_merging(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3004 LogicVRegister Simulator::mov_merging(VectorFormat vform,
3005                                       LogicVRegister dst,
3006                                       const SimPRegister& pg,
3007                                       const LogicVRegister& src) {
3008   return sel(vform, dst, pg, src, dst);
3009 }
3010 
3011 
mov_zeroing(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3012 LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3013                                       LogicVRegister dst,
3014                                       const SimPRegister& pg,
3015                                       const LogicVRegister& src) {
3016   SimVRegister zero;
3017   dup_immediate(vform, zero, 0);
3018   return sel(vform, dst, pg, src, zero);
3019 }
3020 
3021 
mov_merging(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3022 LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3023                                       const LogicPRegister& pg,
3024                                       const LogicPRegister& src) {
3025   return sel(dst, pg, src, dst);
3026 }
3027 
3028 
mov_zeroing(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3029 LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3030                                       const LogicPRegister& pg,
3031                                       const LogicPRegister& src) {
3032   SimPRegister all_false;
3033   return sel(dst, pg, src, pfalse(all_false));
3034 }
3035 
3036 
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)3037 LogicVRegister Simulator::movi(VectorFormat vform,
3038                                LogicVRegister dst,
3039                                uint64_t imm) {
3040   int lane_count = LaneCountFromFormat(vform);
3041   dst.ClearForWrite(vform);
3042   for (int i = 0; i < lane_count; ++i) {
3043     dst.SetUint(vform, i, imm);
3044   }
3045   return dst;
3046 }
3047 
3048 
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)3049 LogicVRegister Simulator::mvni(VectorFormat vform,
3050                                LogicVRegister dst,
3051                                uint64_t imm) {
3052   int lane_count = LaneCountFromFormat(vform);
3053   dst.ClearForWrite(vform);
3054   for (int i = 0; i < lane_count; ++i) {
3055     dst.SetUint(vform, i, ~imm);
3056   }
3057   return dst;
3058 }
3059 
3060 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)3061 LogicVRegister Simulator::orr(VectorFormat vform,
3062                               LogicVRegister dst,
3063                               const LogicVRegister& src,
3064                               uint64_t imm) {
3065   uint64_t result[16];
3066   int lane_count = LaneCountFromFormat(vform);
3067   for (int i = 0; i < lane_count; ++i) {
3068     result[i] = src.Uint(vform, i) | imm;
3069   }
3070   dst.ClearForWrite(vform);
3071   for (int i = 0; i < lane_count; ++i) {
3072     dst.SetUint(vform, i, result[i]);
3073   }
3074   return dst;
3075 }
3076 
3077 
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3078 LogicVRegister Simulator::uxtl(VectorFormat vform,
3079                                LogicVRegister dst,
3080                                const LogicVRegister& src) {
3081   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3082 
3083   dst.ClearForWrite(vform);
3084   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3085     dst.SetUint(vform, i, src.Uint(vform_half, i));
3086   }
3087   return dst;
3088 }
3089 
3090 
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3091 LogicVRegister Simulator::sxtl(VectorFormat vform,
3092                                LogicVRegister dst,
3093                                const LogicVRegister& src) {
3094   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3095 
3096   dst.ClearForWrite(vform);
3097   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3098     dst.SetInt(vform, i, src.Int(vform_half, i));
3099   }
3100   return dst;
3101 }
3102 
3103 
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3104 LogicVRegister Simulator::uxtl2(VectorFormat vform,
3105                                 LogicVRegister dst,
3106                                 const LogicVRegister& src) {
3107   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3108   int lane_count = LaneCountFromFormat(vform);
3109 
3110   dst.ClearForWrite(vform);
3111   for (int i = 0; i < lane_count; i++) {
3112     dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
3113   }
3114   return dst;
3115 }
3116 
3117 
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3118 LogicVRegister Simulator::sxtl2(VectorFormat vform,
3119                                 LogicVRegister dst,
3120                                 const LogicVRegister& src) {
3121   VectorFormat vform_half = VectorFormatHalfWidth(vform);
3122   int lane_count = LaneCountFromFormat(vform);
3123 
3124   dst.ClearForWrite(vform);
3125   for (int i = 0; i < lane_count; i++) {
3126     dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
3127   }
3128   return dst;
3129 }
3130 
3131 
uxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3132 LogicVRegister Simulator::uxt(VectorFormat vform,
3133                               LogicVRegister dst,
3134                               const LogicVRegister& src,
3135                               unsigned from_size_in_bits) {
3136   int lane_count = LaneCountFromFormat(vform);
3137   uint64_t mask = GetUintMask(from_size_in_bits);
3138 
3139   dst.ClearForWrite(vform);
3140   for (int i = 0; i < lane_count; i++) {
3141     dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3142   }
3143   return dst;
3144 }
3145 
3146 
sxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3147 LogicVRegister Simulator::sxt(VectorFormat vform,
3148                               LogicVRegister dst,
3149                               const LogicVRegister& src,
3150                               unsigned from_size_in_bits) {
3151   int lane_count = LaneCountFromFormat(vform);
3152 
3153   dst.ClearForWrite(vform);
3154   for (int i = 0; i < lane_count; i++) {
3155     uint64_t value =
3156         ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3157     dst.SetInt(vform, i, value);
3158   }
3159   return dst;
3160 }
3161 
3162 
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3163 LogicVRegister Simulator::shrn(VectorFormat vform,
3164                                LogicVRegister dst,
3165                                const LogicVRegister& src,
3166                                int shift) {
3167   SimVRegister temp;
3168   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3169   VectorFormat vform_dst = vform;
3170   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3171   return extractnarrow(vform_dst, dst, false, shifted_src, false);
3172 }
3173 
3174 
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3175 LogicVRegister Simulator::shrn2(VectorFormat vform,
3176                                 LogicVRegister dst,
3177                                 const LogicVRegister& src,
3178                                 int shift) {
3179   SimVRegister temp;
3180   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3181   VectorFormat vformdst = vform;
3182   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3183   return extractnarrow(vformdst, dst, false, shifted_src, false);
3184 }
3185 
3186 
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3187 LogicVRegister Simulator::rshrn(VectorFormat vform,
3188                                 LogicVRegister dst,
3189                                 const LogicVRegister& src,
3190                                 int shift) {
3191   SimVRegister temp;
3192   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3193   VectorFormat vformdst = vform;
3194   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3195   return extractnarrow(vformdst, dst, false, shifted_src, false);
3196 }
3197 
3198 
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3199 LogicVRegister Simulator::rshrn2(VectorFormat vform,
3200                                  LogicVRegister dst,
3201                                  const LogicVRegister& src,
3202                                  int shift) {
3203   SimVRegister temp;
3204   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3205   VectorFormat vformdst = vform;
3206   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3207   return extractnarrow(vformdst, dst, false, shifted_src, false);
3208 }
3209 
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3210 LogicVRegister Simulator::Table(VectorFormat vform,
3211                                 LogicVRegister dst,
3212                                 const LogicVRegister& tab,
3213                                 const LogicVRegister& ind) {
3214   VIXL_ASSERT(IsSVEFormat(vform));
3215   int lane_count = LaneCountFromFormat(vform);
3216   for (int i = 0; i < lane_count; i++) {
3217     uint64_t index = ind.Uint(vform, i);
3218     uint64_t value = (index >= static_cast<uint64_t>(lane_count))
3219                          ? 0
3220                          : tab.Uint(vform, static_cast<int>(index));
3221     dst.SetUint(vform, i, value);
3222   }
3223   return dst;
3224 }
3225 
3226 
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)3227 LogicVRegister Simulator::Table(VectorFormat vform,
3228                                 LogicVRegister dst,
3229                                 const LogicVRegister& ind,
3230                                 bool zero_out_of_bounds,
3231                                 const LogicVRegister* tab1,
3232                                 const LogicVRegister* tab2,
3233                                 const LogicVRegister* tab3,
3234                                 const LogicVRegister* tab4) {
3235   VIXL_ASSERT(tab1 != NULL);
3236   const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
3237   uint64_t result[kMaxLanesPerVector];
3238   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3239     result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
3240   }
3241   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3242     uint64_t j = ind.Uint(vform, i);
3243     int tab_idx = static_cast<int>(j >> 4);
3244     int j_idx = static_cast<int>(j & 15);
3245     if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
3246       result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
3247     }
3248   }
3249   dst.SetUintArray(vform, result);
3250   return dst;
3251 }
3252 
3253 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3254 LogicVRegister Simulator::tbl(VectorFormat vform,
3255                               LogicVRegister dst,
3256                               const LogicVRegister& tab,
3257                               const LogicVRegister& ind) {
3258   return Table(vform, dst, ind, true, &tab);
3259 }
3260 
3261 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3262 LogicVRegister Simulator::tbl(VectorFormat vform,
3263                               LogicVRegister dst,
3264                               const LogicVRegister& tab,
3265                               const LogicVRegister& tab2,
3266                               const LogicVRegister& ind) {
3267   return Table(vform, dst, ind, true, &tab, &tab2);
3268 }
3269 
3270 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3271 LogicVRegister Simulator::tbl(VectorFormat vform,
3272                               LogicVRegister dst,
3273                               const LogicVRegister& tab,
3274                               const LogicVRegister& tab2,
3275                               const LogicVRegister& tab3,
3276                               const LogicVRegister& ind) {
3277   return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3278 }
3279 
3280 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3281 LogicVRegister Simulator::tbl(VectorFormat vform,
3282                               LogicVRegister dst,
3283                               const LogicVRegister& tab,
3284                               const LogicVRegister& tab2,
3285                               const LogicVRegister& tab3,
3286                               const LogicVRegister& tab4,
3287                               const LogicVRegister& ind) {
3288   return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3289 }
3290 
3291 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3292 LogicVRegister Simulator::tbx(VectorFormat vform,
3293                               LogicVRegister dst,
3294                               const LogicVRegister& tab,
3295                               const LogicVRegister& ind) {
3296   return Table(vform, dst, ind, false, &tab);
3297 }
3298 
3299 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3300 LogicVRegister Simulator::tbx(VectorFormat vform,
3301                               LogicVRegister dst,
3302                               const LogicVRegister& tab,
3303                               const LogicVRegister& tab2,
3304                               const LogicVRegister& ind) {
3305   return Table(vform, dst, ind, false, &tab, &tab2);
3306 }
3307 
3308 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3309 LogicVRegister Simulator::tbx(VectorFormat vform,
3310                               LogicVRegister dst,
3311                               const LogicVRegister& tab,
3312                               const LogicVRegister& tab2,
3313                               const LogicVRegister& tab3,
3314                               const LogicVRegister& ind) {
3315   return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3316 }
3317 
3318 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3319 LogicVRegister Simulator::tbx(VectorFormat vform,
3320                               LogicVRegister dst,
3321                               const LogicVRegister& tab,
3322                               const LogicVRegister& tab2,
3323                               const LogicVRegister& tab3,
3324                               const LogicVRegister& tab4,
3325                               const LogicVRegister& ind) {
3326   return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3327 }
3328 
3329 
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3330 LogicVRegister Simulator::uqshrn(VectorFormat vform,
3331                                  LogicVRegister dst,
3332                                  const LogicVRegister& src,
3333                                  int shift) {
3334   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3335 }
3336 
3337 
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3338 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3339                                   LogicVRegister dst,
3340                                   const LogicVRegister& src,
3341                                   int shift) {
3342   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3343 }
3344 
3345 
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3346 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3347                                   LogicVRegister dst,
3348                                   const LogicVRegister& src,
3349                                   int shift) {
3350   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3351 }
3352 
3353 
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3354 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3355                                    LogicVRegister dst,
3356                                    const LogicVRegister& src,
3357                                    int shift) {
3358   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3359 }
3360 
3361 
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3362 LogicVRegister Simulator::sqshrn(VectorFormat vform,
3363                                  LogicVRegister dst,
3364                                  const LogicVRegister& src,
3365                                  int shift) {
3366   SimVRegister temp;
3367   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3368   VectorFormat vformdst = vform;
3369   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3370   return sqxtn(vformdst, dst, shifted_src);
3371 }
3372 
3373 
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3374 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3375                                   LogicVRegister dst,
3376                                   const LogicVRegister& src,
3377                                   int shift) {
3378   SimVRegister temp;
3379   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3380   VectorFormat vformdst = vform;
3381   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3382   return sqxtn(vformdst, dst, shifted_src);
3383 }
3384 
3385 
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3386 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3387                                   LogicVRegister dst,
3388                                   const LogicVRegister& src,
3389                                   int shift) {
3390   SimVRegister temp;
3391   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3392   VectorFormat vformdst = vform;
3393   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3394   return sqxtn(vformdst, dst, shifted_src);
3395 }
3396 
3397 
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3398 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3399                                    LogicVRegister dst,
3400                                    const LogicVRegister& src,
3401                                    int shift) {
3402   SimVRegister temp;
3403   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3404   VectorFormat vformdst = vform;
3405   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3406   return sqxtn(vformdst, dst, shifted_src);
3407 }
3408 
3409 
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3410 LogicVRegister Simulator::sqshrun(VectorFormat vform,
3411                                   LogicVRegister dst,
3412                                   const LogicVRegister& src,
3413                                   int shift) {
3414   SimVRegister temp;
3415   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3416   VectorFormat vformdst = vform;
3417   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3418   return sqxtun(vformdst, dst, shifted_src);
3419 }
3420 
3421 
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3422 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3423                                    LogicVRegister dst,
3424                                    const LogicVRegister& src,
3425                                    int shift) {
3426   SimVRegister temp;
3427   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3428   VectorFormat vformdst = vform;
3429   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3430   return sqxtun(vformdst, dst, shifted_src);
3431 }
3432 
3433 
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3434 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3435                                    LogicVRegister dst,
3436                                    const LogicVRegister& src,
3437                                    int shift) {
3438   SimVRegister temp;
3439   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3440   VectorFormat vformdst = vform;
3441   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3442   return sqxtun(vformdst, dst, shifted_src);
3443 }
3444 
3445 
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3446 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3447                                     LogicVRegister dst,
3448                                     const LogicVRegister& src,
3449                                     int shift) {
3450   SimVRegister temp;
3451   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3452   VectorFormat vformdst = vform;
3453   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3454   return sqxtun(vformdst, dst, shifted_src);
3455 }
3456 
3457 
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3458 LogicVRegister Simulator::uaddl(VectorFormat vform,
3459                                 LogicVRegister dst,
3460                                 const LogicVRegister& src1,
3461                                 const LogicVRegister& src2) {
3462   SimVRegister temp1, temp2;
3463   uxtl(vform, temp1, src1);
3464   uxtl(vform, temp2, src2);
3465   add(vform, dst, temp1, temp2);
3466   return dst;
3467 }
3468 
3469 
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3470 LogicVRegister Simulator::uaddl2(VectorFormat vform,
3471                                  LogicVRegister dst,
3472                                  const LogicVRegister& src1,
3473                                  const LogicVRegister& src2) {
3474   SimVRegister temp1, temp2;
3475   uxtl2(vform, temp1, src1);
3476   uxtl2(vform, temp2, src2);
3477   add(vform, dst, temp1, temp2);
3478   return dst;
3479 }
3480 
3481 
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3482 LogicVRegister Simulator::uaddw(VectorFormat vform,
3483                                 LogicVRegister dst,
3484                                 const LogicVRegister& src1,
3485                                 const LogicVRegister& src2) {
3486   SimVRegister temp;
3487   uxtl(vform, temp, src2);
3488   add(vform, dst, src1, temp);
3489   return dst;
3490 }
3491 
3492 
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3493 LogicVRegister Simulator::uaddw2(VectorFormat vform,
3494                                  LogicVRegister dst,
3495                                  const LogicVRegister& src1,
3496                                  const LogicVRegister& src2) {
3497   SimVRegister temp;
3498   uxtl2(vform, temp, src2);
3499   add(vform, dst, src1, temp);
3500   return dst;
3501 }
3502 
3503 
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3504 LogicVRegister Simulator::saddl(VectorFormat vform,
3505                                 LogicVRegister dst,
3506                                 const LogicVRegister& src1,
3507                                 const LogicVRegister& src2) {
3508   SimVRegister temp1, temp2;
3509   sxtl(vform, temp1, src1);
3510   sxtl(vform, temp2, src2);
3511   add(vform, dst, temp1, temp2);
3512   return dst;
3513 }
3514 
3515 
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3516 LogicVRegister Simulator::saddl2(VectorFormat vform,
3517                                  LogicVRegister dst,
3518                                  const LogicVRegister& src1,
3519                                  const LogicVRegister& src2) {
3520   SimVRegister temp1, temp2;
3521   sxtl2(vform, temp1, src1);
3522   sxtl2(vform, temp2, src2);
3523   add(vform, dst, temp1, temp2);
3524   return dst;
3525 }
3526 
3527 
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3528 LogicVRegister Simulator::saddw(VectorFormat vform,
3529                                 LogicVRegister dst,
3530                                 const LogicVRegister& src1,
3531                                 const LogicVRegister& src2) {
3532   SimVRegister temp;
3533   sxtl(vform, temp, src2);
3534   add(vform, dst, src1, temp);
3535   return dst;
3536 }
3537 
3538 
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3539 LogicVRegister Simulator::saddw2(VectorFormat vform,
3540                                  LogicVRegister dst,
3541                                  const LogicVRegister& src1,
3542                                  const LogicVRegister& src2) {
3543   SimVRegister temp;
3544   sxtl2(vform, temp, src2);
3545   add(vform, dst, src1, temp);
3546   return dst;
3547 }
3548 
3549 
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3550 LogicVRegister Simulator::usubl(VectorFormat vform,
3551                                 LogicVRegister dst,
3552                                 const LogicVRegister& src1,
3553                                 const LogicVRegister& src2) {
3554   SimVRegister temp1, temp2;
3555   uxtl(vform, temp1, src1);
3556   uxtl(vform, temp2, src2);
3557   sub(vform, dst, temp1, temp2);
3558   return dst;
3559 }
3560 
3561 
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3562 LogicVRegister Simulator::usubl2(VectorFormat vform,
3563                                  LogicVRegister dst,
3564                                  const LogicVRegister& src1,
3565                                  const LogicVRegister& src2) {
3566   SimVRegister temp1, temp2;
3567   uxtl2(vform, temp1, src1);
3568   uxtl2(vform, temp2, src2);
3569   sub(vform, dst, temp1, temp2);
3570   return dst;
3571 }
3572 
3573 
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3574 LogicVRegister Simulator::usubw(VectorFormat vform,
3575                                 LogicVRegister dst,
3576                                 const LogicVRegister& src1,
3577                                 const LogicVRegister& src2) {
3578   SimVRegister temp;
3579   uxtl(vform, temp, src2);
3580   sub(vform, dst, src1, temp);
3581   return dst;
3582 }
3583 
3584 
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3585 LogicVRegister Simulator::usubw2(VectorFormat vform,
3586                                  LogicVRegister dst,
3587                                  const LogicVRegister& src1,
3588                                  const LogicVRegister& src2) {
3589   SimVRegister temp;
3590   uxtl2(vform, temp, src2);
3591   sub(vform, dst, src1, temp);
3592   return dst;
3593 }
3594 
3595 
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3596 LogicVRegister Simulator::ssubl(VectorFormat vform,
3597                                 LogicVRegister dst,
3598                                 const LogicVRegister& src1,
3599                                 const LogicVRegister& src2) {
3600   SimVRegister temp1, temp2;
3601   sxtl(vform, temp1, src1);
3602   sxtl(vform, temp2, src2);
3603   sub(vform, dst, temp1, temp2);
3604   return dst;
3605 }
3606 
3607 
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3608 LogicVRegister Simulator::ssubl2(VectorFormat vform,
3609                                  LogicVRegister dst,
3610                                  const LogicVRegister& src1,
3611                                  const LogicVRegister& src2) {
3612   SimVRegister temp1, temp2;
3613   sxtl2(vform, temp1, src1);
3614   sxtl2(vform, temp2, src2);
3615   sub(vform, dst, temp1, temp2);
3616   return dst;
3617 }
3618 
3619 
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3620 LogicVRegister Simulator::ssubw(VectorFormat vform,
3621                                 LogicVRegister dst,
3622                                 const LogicVRegister& src1,
3623                                 const LogicVRegister& src2) {
3624   SimVRegister temp;
3625   sxtl(vform, temp, src2);
3626   sub(vform, dst, src1, temp);
3627   return dst;
3628 }
3629 
3630 
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3631 LogicVRegister Simulator::ssubw2(VectorFormat vform,
3632                                  LogicVRegister dst,
3633                                  const LogicVRegister& src1,
3634                                  const LogicVRegister& src2) {
3635   SimVRegister temp;
3636   sxtl2(vform, temp, src2);
3637   sub(vform, dst, src1, temp);
3638   return dst;
3639 }
3640 
3641 
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3642 LogicVRegister Simulator::uabal(VectorFormat vform,
3643                                 LogicVRegister dst,
3644                                 const LogicVRegister& src1,
3645                                 const LogicVRegister& src2) {
3646   SimVRegister temp1, temp2;
3647   uxtl(vform, temp1, src1);
3648   uxtl(vform, temp2, src2);
3649   uaba(vform, dst, temp1, temp2);
3650   return dst;
3651 }
3652 
3653 
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3654 LogicVRegister Simulator::uabal2(VectorFormat vform,
3655                                  LogicVRegister dst,
3656                                  const LogicVRegister& src1,
3657                                  const LogicVRegister& src2) {
3658   SimVRegister temp1, temp2;
3659   uxtl2(vform, temp1, src1);
3660   uxtl2(vform, temp2, src2);
3661   uaba(vform, dst, temp1, temp2);
3662   return dst;
3663 }
3664 
3665 
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3666 LogicVRegister Simulator::sabal(VectorFormat vform,
3667                                 LogicVRegister dst,
3668                                 const LogicVRegister& src1,
3669                                 const LogicVRegister& src2) {
3670   SimVRegister temp1, temp2;
3671   sxtl(vform, temp1, src1);
3672   sxtl(vform, temp2, src2);
3673   saba(vform, dst, temp1, temp2);
3674   return dst;
3675 }
3676 
3677 
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3678 LogicVRegister Simulator::sabal2(VectorFormat vform,
3679                                  LogicVRegister dst,
3680                                  const LogicVRegister& src1,
3681                                  const LogicVRegister& src2) {
3682   SimVRegister temp1, temp2;
3683   sxtl2(vform, temp1, src1);
3684   sxtl2(vform, temp2, src2);
3685   saba(vform, dst, temp1, temp2);
3686   return dst;
3687 }
3688 
3689 
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3690 LogicVRegister Simulator::uabdl(VectorFormat vform,
3691                                 LogicVRegister dst,
3692                                 const LogicVRegister& src1,
3693                                 const LogicVRegister& src2) {
3694   SimVRegister temp1, temp2;
3695   uxtl(vform, temp1, src1);
3696   uxtl(vform, temp2, src2);
3697   absdiff(vform, dst, temp1, temp2, false);
3698   return dst;
3699 }
3700 
3701 
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3702 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3703                                  LogicVRegister dst,
3704                                  const LogicVRegister& src1,
3705                                  const LogicVRegister& src2) {
3706   SimVRegister temp1, temp2;
3707   uxtl2(vform, temp1, src1);
3708   uxtl2(vform, temp2, src2);
3709   absdiff(vform, dst, temp1, temp2, false);
3710   return dst;
3711 }
3712 
3713 
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3714 LogicVRegister Simulator::sabdl(VectorFormat vform,
3715                                 LogicVRegister dst,
3716                                 const LogicVRegister& src1,
3717                                 const LogicVRegister& src2) {
3718   SimVRegister temp1, temp2;
3719   sxtl(vform, temp1, src1);
3720   sxtl(vform, temp2, src2);
3721   absdiff(vform, dst, temp1, temp2, true);
3722   return dst;
3723 }
3724 
3725 
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3726 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3727                                  LogicVRegister dst,
3728                                  const LogicVRegister& src1,
3729                                  const LogicVRegister& src2) {
3730   SimVRegister temp1, temp2;
3731   sxtl2(vform, temp1, src1);
3732   sxtl2(vform, temp2, src2);
3733   absdiff(vform, dst, temp1, temp2, true);
3734   return dst;
3735 }
3736 
3737 
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3738 LogicVRegister Simulator::umull(VectorFormat vform,
3739                                 LogicVRegister dst,
3740                                 const LogicVRegister& src1,
3741                                 const LogicVRegister& src2) {
3742   SimVRegister temp1, temp2;
3743   uxtl(vform, temp1, src1);
3744   uxtl(vform, temp2, src2);
3745   mul(vform, dst, temp1, temp2);
3746   return dst;
3747 }
3748 
3749 
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3750 LogicVRegister Simulator::umull2(VectorFormat vform,
3751                                  LogicVRegister dst,
3752                                  const LogicVRegister& src1,
3753                                  const LogicVRegister& src2) {
3754   SimVRegister temp1, temp2;
3755   uxtl2(vform, temp1, src1);
3756   uxtl2(vform, temp2, src2);
3757   mul(vform, dst, temp1, temp2);
3758   return dst;
3759 }
3760 
3761 
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3762 LogicVRegister Simulator::smull(VectorFormat vform,
3763                                 LogicVRegister dst,
3764                                 const LogicVRegister& src1,
3765                                 const LogicVRegister& src2) {
3766   SimVRegister temp1, temp2;
3767   sxtl(vform, temp1, src1);
3768   sxtl(vform, temp2, src2);
3769   mul(vform, dst, temp1, temp2);
3770   return dst;
3771 }
3772 
3773 
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3774 LogicVRegister Simulator::smull2(VectorFormat vform,
3775                                  LogicVRegister dst,
3776                                  const LogicVRegister& src1,
3777                                  const LogicVRegister& src2) {
3778   SimVRegister temp1, temp2;
3779   sxtl2(vform, temp1, src1);
3780   sxtl2(vform, temp2, src2);
3781   mul(vform, dst, temp1, temp2);
3782   return dst;
3783 }
3784 
3785 
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3786 LogicVRegister Simulator::umlsl(VectorFormat vform,
3787                                 LogicVRegister dst,
3788                                 const LogicVRegister& src1,
3789                                 const LogicVRegister& src2) {
3790   SimVRegister temp1, temp2;
3791   uxtl(vform, temp1, src1);
3792   uxtl(vform, temp2, src2);
3793   mls(vform, dst, dst, temp1, temp2);
3794   return dst;
3795 }
3796 
3797 
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3798 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3799                                  LogicVRegister dst,
3800                                  const LogicVRegister& src1,
3801                                  const LogicVRegister& src2) {
3802   SimVRegister temp1, temp2;
3803   uxtl2(vform, temp1, src1);
3804   uxtl2(vform, temp2, src2);
3805   mls(vform, dst, dst, temp1, temp2);
3806   return dst;
3807 }
3808 
3809 
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3810 LogicVRegister Simulator::smlsl(VectorFormat vform,
3811                                 LogicVRegister dst,
3812                                 const LogicVRegister& src1,
3813                                 const LogicVRegister& src2) {
3814   SimVRegister temp1, temp2;
3815   sxtl(vform, temp1, src1);
3816   sxtl(vform, temp2, src2);
3817   mls(vform, dst, dst, temp1, temp2);
3818   return dst;
3819 }
3820 
3821 
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3822 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3823                                  LogicVRegister dst,
3824                                  const LogicVRegister& src1,
3825                                  const LogicVRegister& src2) {
3826   SimVRegister temp1, temp2;
3827   sxtl2(vform, temp1, src1);
3828   sxtl2(vform, temp2, src2);
3829   mls(vform, dst, dst, temp1, temp2);
3830   return dst;
3831 }
3832 
3833 
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3834 LogicVRegister Simulator::umlal(VectorFormat vform,
3835                                 LogicVRegister dst,
3836                                 const LogicVRegister& src1,
3837                                 const LogicVRegister& src2) {
3838   SimVRegister temp1, temp2;
3839   uxtl(vform, temp1, src1);
3840   uxtl(vform, temp2, src2);
3841   mla(vform, dst, dst, temp1, temp2);
3842   return dst;
3843 }
3844 
3845 
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3846 LogicVRegister Simulator::umlal2(VectorFormat vform,
3847                                  LogicVRegister dst,
3848                                  const LogicVRegister& src1,
3849                                  const LogicVRegister& src2) {
3850   SimVRegister temp1, temp2;
3851   uxtl2(vform, temp1, src1);
3852   uxtl2(vform, temp2, src2);
3853   mla(vform, dst, dst, temp1, temp2);
3854   return dst;
3855 }
3856 
3857 
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3858 LogicVRegister Simulator::smlal(VectorFormat vform,
3859                                 LogicVRegister dst,
3860                                 const LogicVRegister& src1,
3861                                 const LogicVRegister& src2) {
3862   SimVRegister temp1, temp2;
3863   sxtl(vform, temp1, src1);
3864   sxtl(vform, temp2, src2);
3865   mla(vform, dst, dst, temp1, temp2);
3866   return dst;
3867 }
3868 
3869 
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3870 LogicVRegister Simulator::smlal2(VectorFormat vform,
3871                                  LogicVRegister dst,
3872                                  const LogicVRegister& src1,
3873                                  const LogicVRegister& src2) {
3874   SimVRegister temp1, temp2;
3875   sxtl2(vform, temp1, src1);
3876   sxtl2(vform, temp2, src2);
3877   mla(vform, dst, dst, temp1, temp2);
3878   return dst;
3879 }
3880 
3881 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3882 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3883                                   LogicVRegister dst,
3884                                   const LogicVRegister& src1,
3885                                   const LogicVRegister& src2) {
3886   SimVRegister temp;
3887   LogicVRegister product = sqdmull(vform, temp, src1, src2);
3888   return add(vform, dst, dst, product).SignedSaturate(vform);
3889 }
3890 
3891 
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3892 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3893                                    LogicVRegister dst,
3894                                    const LogicVRegister& src1,
3895                                    const LogicVRegister& src2) {
3896   SimVRegister temp;
3897   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3898   return add(vform, dst, dst, product).SignedSaturate(vform);
3899 }
3900 
3901 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3902 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3903                                   LogicVRegister dst,
3904                                   const LogicVRegister& src1,
3905                                   const LogicVRegister& src2) {
3906   SimVRegister temp;
3907   LogicVRegister product = sqdmull(vform, temp, src1, src2);
3908   return sub(vform, dst, dst, product).SignedSaturate(vform);
3909 }
3910 
3911 
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3912 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3913                                    LogicVRegister dst,
3914                                    const LogicVRegister& src1,
3915                                    const LogicVRegister& src2) {
3916   SimVRegister temp;
3917   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3918   return sub(vform, dst, dst, product).SignedSaturate(vform);
3919 }
3920 
3921 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3922 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3923                                   LogicVRegister dst,
3924                                   const LogicVRegister& src1,
3925                                   const LogicVRegister& src2) {
3926   SimVRegister temp;
3927   LogicVRegister product = smull(vform, temp, src1, src2);
3928   return add(vform, dst, product, product).SignedSaturate(vform);
3929 }
3930 
3931 
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3932 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3933                                    LogicVRegister dst,
3934                                    const LogicVRegister& src1,
3935                                    const LogicVRegister& src2) {
3936   SimVRegister temp;
3937   LogicVRegister product = smull2(vform, temp, src1, src2);
3938   return add(vform, dst, product, product).SignedSaturate(vform);
3939 }
3940 
3941 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3942 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3943                                    LogicVRegister dst,
3944                                    const LogicVRegister& src1,
3945                                    const LogicVRegister& src2,
3946                                    bool round) {
3947   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3948   // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3949   // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3950 
3951   int esize = LaneSizeInBitsFromFormat(vform);
3952   int round_const = round ? (1 << (esize - 2)) : 0;
3953   int64_t product;
3954 
3955   dst.ClearForWrite(vform);
3956   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3957     product = src1.Int(vform, i) * src2.Int(vform, i);
3958     product += round_const;
3959     product = product >> (esize - 1);
3960 
3961     if (product > MaxIntFromFormat(vform)) {
3962       product = MaxIntFromFormat(vform);
3963     } else if (product < MinIntFromFormat(vform)) {
3964       product = MinIntFromFormat(vform);
3965     }
3966     dst.SetInt(vform, i, product);
3967   }
3968   return dst;
3969 }
3970 
3971 
dot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_signed)3972 LogicVRegister Simulator::dot(VectorFormat vform,
3973                               LogicVRegister dst,
3974                               const LogicVRegister& src1,
3975                               const LogicVRegister& src2,
3976                               bool is_signed) {
3977   VectorFormat quarter_vform =
3978       VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
3979 
3980   dst.ClearForWrite(vform);
3981   for (int e = 0; e < LaneCountFromFormat(vform); e++) {
3982     uint64_t result = 0;
3983     int64_t element1, element2;
3984     for (int i = 0; i < 4; i++) {
3985       int index = 4 * e + i;
3986       if (is_signed) {
3987         element1 = src1.Int(quarter_vform, index);
3988         element2 = src2.Int(quarter_vform, index);
3989       } else {
3990         element1 = src1.Uint(quarter_vform, index);
3991         element2 = src2.Uint(quarter_vform, index);
3992       }
3993       result += element1 * element2;
3994     }
3995     dst.SetUint(vform, e, result + dst.Uint(vform, e));
3996   }
3997   return dst;
3998 }
3999 
4000 
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4001 LogicVRegister Simulator::sdot(VectorFormat vform,
4002                                LogicVRegister dst,
4003                                const LogicVRegister& src1,
4004                                const LogicVRegister& src2) {
4005   return dot(vform, dst, src1, src2, true);
4006 }
4007 
4008 
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4009 LogicVRegister Simulator::udot(VectorFormat vform,
4010                                LogicVRegister dst,
4011                                const LogicVRegister& src1,
4012                                const LogicVRegister& src2) {
4013   return dot(vform, dst, src1, src2, false);
4014 }
4015 
4016 
sqrdmlash(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4017 LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4018                                     LogicVRegister dst,
4019                                     const LogicVRegister& src1,
4020                                     const LogicVRegister& src2,
4021                                     bool round,
4022                                     bool sub_op) {
4023   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4024   // To avoid this, we use:
4025   //     (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4026   // which is same as:
4027   //     (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4028 
4029   int esize = LaneSizeInBitsFromFormat(vform);
4030   int round_const = round ? (1 << (esize - 2)) : 0;
4031   int64_t accum;
4032 
4033   dst.ClearForWrite(vform);
4034   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4035     accum = dst.Int(vform, i) << (esize - 1);
4036     if (sub_op) {
4037       accum -= src1.Int(vform, i) * src2.Int(vform, i);
4038     } else {
4039       accum += src1.Int(vform, i) * src2.Int(vform, i);
4040     }
4041     accum += round_const;
4042     accum = accum >> (esize - 1);
4043 
4044     if (accum > MaxIntFromFormat(vform)) {
4045       accum = MaxIntFromFormat(vform);
4046     } else if (accum < MinIntFromFormat(vform)) {
4047       accum = MinIntFromFormat(vform);
4048     }
4049     dst.SetInt(vform, i, accum);
4050   }
4051   return dst;
4052 }
4053 
4054 
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4055 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4056                                    LogicVRegister dst,
4057                                    const LogicVRegister& src1,
4058                                    const LogicVRegister& src2,
4059                                    bool round) {
4060   return sqrdmlash(vform, dst, src1, src2, round, false);
4061 }
4062 
4063 
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4064 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4065                                    LogicVRegister dst,
4066                                    const LogicVRegister& src1,
4067                                    const LogicVRegister& src2,
4068                                    bool round) {
4069   return sqrdmlash(vform, dst, src1, src2, round, true);
4070 }
4071 
4072 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4073 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4074                                   LogicVRegister dst,
4075                                   const LogicVRegister& src1,
4076                                   const LogicVRegister& src2) {
4077   return sqrdmulh(vform, dst, src1, src2, false);
4078 }
4079 
4080 
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4081 LogicVRegister Simulator::addhn(VectorFormat vform,
4082                                 LogicVRegister dst,
4083                                 const LogicVRegister& src1,
4084                                 const LogicVRegister& src2) {
4085   SimVRegister temp;
4086   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4087   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4088   return dst;
4089 }
4090 
4091 
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4092 LogicVRegister Simulator::addhn2(VectorFormat vform,
4093                                  LogicVRegister dst,
4094                                  const LogicVRegister& src1,
4095                                  const LogicVRegister& src2) {
4096   SimVRegister temp;
4097   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4098   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4099   return dst;
4100 }
4101 
4102 
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4103 LogicVRegister Simulator::raddhn(VectorFormat vform,
4104                                  LogicVRegister dst,
4105                                  const LogicVRegister& src1,
4106                                  const LogicVRegister& src2) {
4107   SimVRegister temp;
4108   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4109   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4110   return dst;
4111 }
4112 
4113 
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4114 LogicVRegister Simulator::raddhn2(VectorFormat vform,
4115                                   LogicVRegister dst,
4116                                   const LogicVRegister& src1,
4117                                   const LogicVRegister& src2) {
4118   SimVRegister temp;
4119   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4120   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4121   return dst;
4122 }
4123 
4124 
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4125 LogicVRegister Simulator::subhn(VectorFormat vform,
4126                                 LogicVRegister dst,
4127                                 const LogicVRegister& src1,
4128                                 const LogicVRegister& src2) {
4129   SimVRegister temp;
4130   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4131   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4132   return dst;
4133 }
4134 
4135 
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4136 LogicVRegister Simulator::subhn2(VectorFormat vform,
4137                                  LogicVRegister dst,
4138                                  const LogicVRegister& src1,
4139                                  const LogicVRegister& src2) {
4140   SimVRegister temp;
4141   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4142   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4143   return dst;
4144 }
4145 
4146 
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4147 LogicVRegister Simulator::rsubhn(VectorFormat vform,
4148                                  LogicVRegister dst,
4149                                  const LogicVRegister& src1,
4150                                  const LogicVRegister& src2) {
4151   SimVRegister temp;
4152   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4153   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4154   return dst;
4155 }
4156 
4157 
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4158 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4159                                   LogicVRegister dst,
4160                                   const LogicVRegister& src1,
4161                                   const LogicVRegister& src2) {
4162   SimVRegister temp;
4163   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4164   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4165   return dst;
4166 }
4167 
4168 
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4169 LogicVRegister Simulator::trn1(VectorFormat vform,
4170                                LogicVRegister dst,
4171                                const LogicVRegister& src1,
4172                                const LogicVRegister& src2) {
4173   uint64_t result[kZRegMaxSizeInBytes];
4174   int lane_count = LaneCountFromFormat(vform);
4175   int pairs = lane_count / 2;
4176   for (int i = 0; i < pairs; ++i) {
4177     result[2 * i] = src1.Uint(vform, 2 * i);
4178     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4179   }
4180 
4181   dst.ClearForWrite(vform);
4182   for (int i = 0; i < lane_count; ++i) {
4183     dst.SetUint(vform, i, result[i]);
4184   }
4185   return dst;
4186 }
4187 
4188 
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4189 LogicVRegister Simulator::trn2(VectorFormat vform,
4190                                LogicVRegister dst,
4191                                const LogicVRegister& src1,
4192                                const LogicVRegister& src2) {
4193   uint64_t result[kZRegMaxSizeInBytes];
4194   int lane_count = LaneCountFromFormat(vform);
4195   int pairs = lane_count / 2;
4196   for (int i = 0; i < pairs; ++i) {
4197     result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4198     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4199   }
4200 
4201   dst.ClearForWrite(vform);
4202   for (int i = 0; i < lane_count; ++i) {
4203     dst.SetUint(vform, i, result[i]);
4204   }
4205   return dst;
4206 }
4207 
4208 
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4209 LogicVRegister Simulator::zip1(VectorFormat vform,
4210                                LogicVRegister dst,
4211                                const LogicVRegister& src1,
4212                                const LogicVRegister& src2) {
4213   uint64_t result[kZRegMaxSizeInBytes];
4214   int lane_count = LaneCountFromFormat(vform);
4215   int pairs = lane_count / 2;
4216   for (int i = 0; i < pairs; ++i) {
4217     result[2 * i] = src1.Uint(vform, i);
4218     result[(2 * i) + 1] = src2.Uint(vform, i);
4219   }
4220 
4221   dst.ClearForWrite(vform);
4222   for (int i = 0; i < lane_count; ++i) {
4223     dst.SetUint(vform, i, result[i]);
4224   }
4225   return dst;
4226 }
4227 
4228 
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4229 LogicVRegister Simulator::zip2(VectorFormat vform,
4230                                LogicVRegister dst,
4231                                const LogicVRegister& src1,
4232                                const LogicVRegister& src2) {
4233   uint64_t result[kZRegMaxSizeInBytes];
4234   int lane_count = LaneCountFromFormat(vform);
4235   int pairs = lane_count / 2;
4236   for (int i = 0; i < pairs; ++i) {
4237     result[2 * i] = src1.Uint(vform, pairs + i);
4238     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4239   }
4240 
4241   dst.ClearForWrite(vform);
4242   for (int i = 0; i < lane_count; ++i) {
4243     dst.SetUint(vform, i, result[i]);
4244   }
4245   return dst;
4246 }
4247 
4248 
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4249 LogicVRegister Simulator::uzp1(VectorFormat vform,
4250                                LogicVRegister dst,
4251                                const LogicVRegister& src1,
4252                                const LogicVRegister& src2) {
4253   uint64_t result[kZRegMaxSizeInBytes * 2];
4254   int lane_count = LaneCountFromFormat(vform);
4255   for (int i = 0; i < lane_count; ++i) {
4256     result[i] = src1.Uint(vform, i);
4257     result[lane_count + i] = src2.Uint(vform, i);
4258   }
4259 
4260   dst.ClearForWrite(vform);
4261   for (int i = 0; i < lane_count; ++i) {
4262     dst.SetUint(vform, i, result[2 * i]);
4263   }
4264   return dst;
4265 }
4266 
4267 
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4268 LogicVRegister Simulator::uzp2(VectorFormat vform,
4269                                LogicVRegister dst,
4270                                const LogicVRegister& src1,
4271                                const LogicVRegister& src2) {
4272   uint64_t result[kZRegMaxSizeInBytes * 2];
4273   int lane_count = LaneCountFromFormat(vform);
4274   for (int i = 0; i < lane_count; ++i) {
4275     result[i] = src1.Uint(vform, i);
4276     result[lane_count + i] = src2.Uint(vform, i);
4277   }
4278 
4279   dst.ClearForWrite(vform);
4280   for (int i = 0; i < lane_count; ++i) {
4281     dst.SetUint(vform, i, result[(2 * i) + 1]);
4282   }
4283   return dst;
4284 }
4285 
4286 
4287 template <typename T>
FPNeg(T op)4288 T Simulator::FPNeg(T op) {
4289   return -op;
4290 }
4291 
4292 template <typename T>
FPAdd(T op1,T op2)4293 T Simulator::FPAdd(T op1, T op2) {
4294   T result = FPProcessNaNs(op1, op2);
4295   if (IsNaN(result)) {
4296     return result;
4297   }
4298 
4299   if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4300     // inf + -inf returns the default NaN.
4301     FPProcessException();
4302     return FPDefaultNaN<T>();
4303   } else {
4304     // Other cases should be handled by standard arithmetic.
4305     return op1 + op2;
4306   }
4307 }
4308 
4309 
4310 template <typename T>
FPSub(T op1,T op2)4311 T Simulator::FPSub(T op1, T op2) {
4312   // NaNs should be handled elsewhere.
4313   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4314 
4315   if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4316     // inf - inf returns the default NaN.
4317     FPProcessException();
4318     return FPDefaultNaN<T>();
4319   } else {
4320     // Other cases should be handled by standard arithmetic.
4321     return op1 - op2;
4322   }
4323 }
4324 
4325 
4326 template <typename T>
FPMul(T op1,T op2)4327 T Simulator::FPMul(T op1, T op2) {
4328   // NaNs should be handled elsewhere.
4329   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4330 
4331   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4332     // inf * 0.0 returns the default NaN.
4333     FPProcessException();
4334     return FPDefaultNaN<T>();
4335   } else {
4336     // Other cases should be handled by standard arithmetic.
4337     return op1 * op2;
4338   }
4339 }
4340 
4341 
4342 template <typename T>
FPMulx(T op1,T op2)4343 T Simulator::FPMulx(T op1, T op2) {
4344   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4345     // inf * 0.0 returns +/-2.0.
4346     T two = 2.0;
4347     return copysign(1.0, op1) * copysign(1.0, op2) * two;
4348   }
4349   return FPMul(op1, op2);
4350 }
4351 
4352 
4353 template <typename T>
FPMulAdd(T a,T op1,T op2)4354 T Simulator::FPMulAdd(T a, T op1, T op2) {
4355   T result = FPProcessNaNs3(a, op1, op2);
4356 
4357   T sign_a = copysign(1.0, a);
4358   T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
4359   bool isinf_prod = IsInf(op1) || IsInf(op2);
4360   bool operation_generates_nan =
4361       (IsInf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
4362       (IsInf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
4363       (IsInf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
4364 
4365   if (IsNaN(result)) {
4366     // Generated NaNs override quiet NaNs propagated from a.
4367     if (operation_generates_nan && IsQuietNaN(a)) {
4368       FPProcessException();
4369       return FPDefaultNaN<T>();
4370     } else {
4371       return result;
4372     }
4373   }
4374 
4375   // If the operation would produce a NaN, return the default NaN.
4376   if (operation_generates_nan) {
4377     FPProcessException();
4378     return FPDefaultNaN<T>();
4379   }
4380 
4381   // Work around broken fma implementations for exact zero results: The sign of
4382   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4383   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4384     return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
4385   }
4386 
4387   result = FusedMultiplyAdd(op1, op2, a);
4388   VIXL_ASSERT(!IsNaN(result));
4389 
4390   // Work around broken fma implementations for rounded zero results: If a is
4391   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4392   if ((a == 0.0) && (result == 0.0)) {
4393     return copysign(0.0, sign_prod);
4394   }
4395 
4396   return result;
4397 }
4398 
4399 
4400 template <typename T>
FPDiv(T op1,T op2)4401 T Simulator::FPDiv(T op1, T op2) {
4402   // NaNs should be handled elsewhere.
4403   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4404 
4405   if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4406     // inf / inf and 0.0 / 0.0 return the default NaN.
4407     FPProcessException();
4408     return FPDefaultNaN<T>();
4409   } else {
4410     if (op2 == 0.0) {
4411       FPProcessException();
4412       if (!IsNaN(op1)) {
4413         double op1_sign = copysign(1.0, op1);
4414         double op2_sign = copysign(1.0, op2);
4415         return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4416       }
4417     }
4418 
4419     // Other cases should be handled by standard arithmetic.
4420     return op1 / op2;
4421   }
4422 }
4423 
4424 
4425 template <typename T>
FPSqrt(T op)4426 T Simulator::FPSqrt(T op) {
4427   if (IsNaN(op)) {
4428     return FPProcessNaN(op);
4429   } else if (op < T(0.0)) {
4430     FPProcessException();
4431     return FPDefaultNaN<T>();
4432   } else {
4433     return sqrt(op);
4434   }
4435 }
4436 
4437 
4438 template <typename T>
FPMax(T a,T b)4439 T Simulator::FPMax(T a, T b) {
4440   T result = FPProcessNaNs(a, b);
4441   if (IsNaN(result)) return result;
4442 
4443   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4444     // a and b are zero, and the sign differs: return +0.0.
4445     return 0.0;
4446   } else {
4447     return (a > b) ? a : b;
4448   }
4449 }
4450 
4451 
4452 template <typename T>
FPMaxNM(T a,T b)4453 T Simulator::FPMaxNM(T a, T b) {
4454   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4455     a = kFP64NegativeInfinity;
4456   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4457     b = kFP64NegativeInfinity;
4458   }
4459 
4460   T result = FPProcessNaNs(a, b);
4461   return IsNaN(result) ? result : FPMax(a, b);
4462 }
4463 
4464 
4465 template <typename T>
FPMin(T a,T b)4466 T Simulator::FPMin(T a, T b) {
4467   T result = FPProcessNaNs(a, b);
4468   if (IsNaN(result)) return result;
4469 
4470   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4471     // a and b are zero, and the sign differs: return -0.0.
4472     return -0.0;
4473   } else {
4474     return (a < b) ? a : b;
4475   }
4476 }
4477 
4478 
4479 template <typename T>
FPMinNM(T a,T b)4480 T Simulator::FPMinNM(T a, T b) {
4481   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4482     a = kFP64PositiveInfinity;
4483   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4484     b = kFP64PositiveInfinity;
4485   }
4486 
4487   T result = FPProcessNaNs(a, b);
4488   return IsNaN(result) ? result : FPMin(a, b);
4489 }
4490 
4491 
4492 template <typename T>
FPRecipStepFused(T op1,T op2)4493 T Simulator::FPRecipStepFused(T op1, T op2) {
4494   const T two = 2.0;
4495   if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4496     return two;
4497   } else if (IsInf(op1) || IsInf(op2)) {
4498     // Return +inf if signs match, otherwise -inf.
4499     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4500                                           : kFP64NegativeInfinity;
4501   } else {
4502     return FusedMultiplyAdd(op1, op2, two);
4503   }
4504 }
4505 
4506 template <typename T>
IsNormal(T value)4507 bool IsNormal(T value) {
4508   return std::isnormal(value);
4509 }
4510 
4511 template <>
IsNormal(SimFloat16 value)4512 bool IsNormal(SimFloat16 value) {
4513   uint16_t rawbits = Float16ToRawbits(value);
4514   uint16_t exp_mask = 0x7c00;
4515   // Check that the exponent is neither all zeroes or all ones.
4516   return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4517 }
4518 
4519 
4520 template <typename T>
FPRSqrtStepFused(T op1,T op2)4521 T Simulator::FPRSqrtStepFused(T op1, T op2) {
4522   const T one_point_five = 1.5;
4523   const T two = 2.0;
4524 
4525   if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4526     return one_point_five;
4527   } else if (IsInf(op1) || IsInf(op2)) {
4528     // Return +inf if signs match, otherwise -inf.
4529     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4530                                           : kFP64NegativeInfinity;
4531   } else {
4532     // The multiply-add-halve operation must be fully fused, so avoid interim
4533     // rounding by checking which operand can be losslessly divided by two
4534     // before doing the multiply-add.
4535     if (IsNormal(op1 / two)) {
4536       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4537     } else if (IsNormal(op2 / two)) {
4538       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4539     } else {
4540       // Neither operand is normal after halving: the result is dominated by
4541       // the addition term, so just return that.
4542       return one_point_five;
4543     }
4544   }
4545 }
4546 
FPToFixedJS(double value)4547 int32_t Simulator::FPToFixedJS(double value) {
4548   // The Z-flag is set when the conversion from double precision floating-point
4549   // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4550   // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4551   // Z-flag is unset.
4552   int Z = 1;
4553   int32_t result;
4554 
4555   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4556       (value == kFP64NegativeInfinity)) {
4557     // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4558     // unset the Z-flag.
4559     result = 0.0;
4560     if ((value != 0.0) || std::signbit(value)) {
4561       Z = 0;
4562     }
4563   } else if (std::isnan(value)) {
4564     // NaN values unset the Z-flag and set the result to 0.
4565     FPProcessNaN(value);
4566     result = 0;
4567     Z = 0;
4568   } else {
4569     // All other values are converted to an integer representation, rounded
4570     // toward zero.
4571     double int_result = std::floor(value);
4572     double error = value - int_result;
4573 
4574     if ((error != 0.0) && (int_result < 0.0)) {
4575       int_result++;
4576     }
4577 
4578     // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4579     // write a one-liner with std::round, but the behaviour on ties is incorrect
4580     // for our purposes.
4581     double mod_const = static_cast<double>(UINT64_C(1) << 32);
4582     double mod_error =
4583         (int_result / mod_const) - std::floor(int_result / mod_const);
4584     double constrained;
4585     if (mod_error == 0.5) {
4586       constrained = INT32_MIN;
4587     } else {
4588       constrained = int_result - mod_const * round(int_result / mod_const);
4589     }
4590 
4591     VIXL_ASSERT(std::floor(constrained) == constrained);
4592     VIXL_ASSERT(constrained >= INT32_MIN);
4593     VIXL_ASSERT(constrained <= INT32_MAX);
4594 
4595     // Take the bottom 32 bits of the result as a 32-bit integer.
4596     result = static_cast<int32_t>(constrained);
4597 
4598     if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4599         (error != 0.0)) {
4600       // If the integer result is out of range or the conversion isn't exact,
4601       // take exception and unset the Z-flag.
4602       FPProcessException();
4603       Z = 0;
4604     }
4605   }
4606 
4607   ReadNzcv().SetN(0);
4608   ReadNzcv().SetZ(Z);
4609   ReadNzcv().SetC(0);
4610   ReadNzcv().SetV(0);
4611 
4612   return result;
4613 }
4614 
FPRoundIntCommon(double value,FPRounding round_mode)4615 double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4616   VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4617               (value != kFP64NegativeInfinity));
4618   VIXL_ASSERT(!IsNaN(value));
4619 
4620   double int_result = std::floor(value);
4621   double error = value - int_result;
4622   switch (round_mode) {
4623     case FPTieAway: {
4624       // Take care of correctly handling the range ]-0.5, -0.0], which must
4625       // yield -0.0.
4626       if ((-0.5 < value) && (value < 0.0)) {
4627         int_result = -0.0;
4628 
4629       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4630         // If the error is greater than 0.5, or is equal to 0.5 and the integer
4631         // result is positive, round up.
4632         int_result++;
4633       }
4634       break;
4635     }
4636     case FPTieEven: {
4637       // Take care of correctly handling the range [-0.5, -0.0], which must
4638       // yield -0.0.
4639       if ((-0.5 <= value) && (value < 0.0)) {
4640         int_result = -0.0;
4641 
4642         // If the error is greater than 0.5, or is equal to 0.5 and the integer
4643         // result is odd, round up.
4644       } else if ((error > 0.5) ||
4645                  ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4646         int_result++;
4647       }
4648       break;
4649     }
4650     case FPZero: {
4651       // If value>0 then we take floor(value)
4652       // otherwise, ceil(value).
4653       if (value < 0) {
4654         int_result = ceil(value);
4655       }
4656       break;
4657     }
4658     case FPNegativeInfinity: {
4659       // We always use floor(value).
4660       break;
4661     }
4662     case FPPositiveInfinity: {
4663       // Take care of correctly handling the range ]-1.0, -0.0], which must
4664       // yield -0.0.
4665       if ((-1.0 < value) && (value < 0.0)) {
4666         int_result = -0.0;
4667 
4668         // If the error is non-zero, round up.
4669       } else if (error > 0.0) {
4670         int_result++;
4671       }
4672       break;
4673     }
4674     default:
4675       VIXL_UNIMPLEMENTED();
4676   }
4677   return int_result;
4678 }
4679 
FPRoundInt(double value,FPRounding round_mode)4680 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4681   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4682       (value == kFP64NegativeInfinity)) {
4683     return value;
4684   } else if (IsNaN(value)) {
4685     return FPProcessNaN(value);
4686   }
4687   return FPRoundIntCommon(value, round_mode);
4688 }
4689 
FPRoundInt(double value,FPRounding round_mode,FrintMode frint_mode)4690 double Simulator::FPRoundInt(double value,
4691                              FPRounding round_mode,
4692                              FrintMode frint_mode) {
4693   if (frint_mode == kFrintToInteger) {
4694     return FPRoundInt(value, round_mode);
4695   }
4696 
4697   VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4698 
4699   if (value == 0.0) {
4700     return value;
4701   }
4702 
4703   if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4704       IsNaN(value)) {
4705     if (frint_mode == kFrintToInt32) {
4706       return INT32_MIN;
4707     } else {
4708       return INT64_MIN;
4709     }
4710   }
4711 
4712   double result = FPRoundIntCommon(value, round_mode);
4713 
4714   // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4715   // representable as a double, and is rounded to (INT64_MAX + 1) when
4716   // converted. To avoid this, we compare `result >= int64_max_plus_one`
4717   // instead; this is safe because `result` is known to be integral, and
4718   // `int64_max_plus_one` is exactly representable as a double.
4719   constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4720   VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4721                          int64_max_plus_one)) == int64_max_plus_one);
4722 
4723   if (frint_mode == kFrintToInt32) {
4724     if ((result > INT32_MAX) || (result < INT32_MIN)) {
4725       return INT32_MIN;
4726     }
4727   } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4728     return INT64_MIN;
4729   }
4730 
4731   return result;
4732 }
4733 
FPToInt16(double value,FPRounding rmode)4734 int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4735   value = FPRoundInt(value, rmode);
4736   if (value >= kHMaxInt) {
4737     return kHMaxInt;
4738   } else if (value < kHMinInt) {
4739     return kHMinInt;
4740   }
4741   return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4742 }
4743 
4744 
FPToInt32(double value,FPRounding rmode)4745 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4746   value = FPRoundInt(value, rmode);
4747   if (value >= kWMaxInt) {
4748     return kWMaxInt;
4749   } else if (value < kWMinInt) {
4750     return kWMinInt;
4751   }
4752   return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4753 }
4754 
4755 
FPToInt64(double value,FPRounding rmode)4756 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4757   value = FPRoundInt(value, rmode);
4758   if (value >= kXMaxInt) {
4759     return kXMaxInt;
4760   } else if (value < kXMinInt) {
4761     return kXMinInt;
4762   }
4763   return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4764 }
4765 
4766 
FPToUInt16(double value,FPRounding rmode)4767 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4768   value = FPRoundInt(value, rmode);
4769   if (value >= kHMaxUInt) {
4770     return kHMaxUInt;
4771   } else if (value < 0.0) {
4772     return 0;
4773   }
4774   return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4775 }
4776 
4777 
FPToUInt32(double value,FPRounding rmode)4778 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4779   value = FPRoundInt(value, rmode);
4780   if (value >= kWMaxUInt) {
4781     return kWMaxUInt;
4782   } else if (value < 0.0) {
4783     return 0;
4784   }
4785   return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
4786 }
4787 
4788 
FPToUInt64(double value,FPRounding rmode)4789 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
4790   value = FPRoundInt(value, rmode);
4791   if (value >= kXMaxUInt) {
4792     return kXMaxUInt;
4793   } else if (value < 0.0) {
4794     return 0;
4795   }
4796   return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
4797 }
4798 
4799 
4800 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
4801   template <typename T>                                          \
4802   LogicVRegister Simulator::FN(VectorFormat vform,               \
4803                                LogicVRegister dst,               \
4804                                const LogicVRegister& src1,       \
4805                                const LogicVRegister& src2) {     \
4806     dst.ClearForWrite(vform);                                    \
4807     for (int i = 0; i < LaneCountFromFormat(vform); i++) {       \
4808       T op1 = src1.Float<T>(i);                                  \
4809       T op2 = src2.Float<T>(i);                                  \
4810       T result;                                                  \
4811       if (PROCNAN) {                                             \
4812         result = FPProcessNaNs(op1, op2);                        \
4813         if (!IsNaN(result)) {                                    \
4814           result = OP(op1, op2);                                 \
4815         }                                                        \
4816       } else {                                                   \
4817         result = OP(op1, op2);                                   \
4818       }                                                          \
4819       dst.SetFloat(vform, i, result);                            \
4820     }                                                            \
4821     return dst;                                                  \
4822   }                                                              \
4823                                                                  \
4824   LogicVRegister Simulator::FN(VectorFormat vform,               \
4825                                LogicVRegister dst,               \
4826                                const LogicVRegister& src1,       \
4827                                const LogicVRegister& src2) {     \
4828     if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {          \
4829       FN<SimFloat16>(vform, dst, src1, src2);                    \
4830     } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {   \
4831       FN<float>(vform, dst, src1, src2);                         \
4832     } else {                                                     \
4833       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
4834       FN<double>(vform, dst, src1, src2);                        \
4835     }                                                            \
4836     return dst;                                                  \
4837   }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)4838 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
4839 #undef DEFINE_NEON_FP_VECTOR_OP
4840 
4841 
4842 LogicVRegister Simulator::fnmul(VectorFormat vform,
4843                                 LogicVRegister dst,
4844                                 const LogicVRegister& src1,
4845                                 const LogicVRegister& src2) {
4846   SimVRegister temp;
4847   LogicVRegister product = fmul(vform, temp, src1, src2);
4848   return fneg(vform, dst, product);
4849 }
4850 
4851 
4852 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4853 LogicVRegister Simulator::frecps(VectorFormat vform,
4854                                  LogicVRegister dst,
4855                                  const LogicVRegister& src1,
4856                                  const LogicVRegister& src2) {
4857   dst.ClearForWrite(vform);
4858   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4859     T op1 = -src1.Float<T>(i);
4860     T op2 = src2.Float<T>(i);
4861     T result = FPProcessNaNs(op1, op2);
4862     dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
4863   }
4864   return dst;
4865 }
4866 
4867 
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4868 LogicVRegister Simulator::frecps(VectorFormat vform,
4869                                  LogicVRegister dst,
4870                                  const LogicVRegister& src1,
4871                                  const LogicVRegister& src2) {
4872   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4873     frecps<SimFloat16>(vform, dst, src1, src2);
4874   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4875     frecps<float>(vform, dst, src1, src2);
4876   } else {
4877     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4878     frecps<double>(vform, dst, src1, src2);
4879   }
4880   return dst;
4881 }
4882 
4883 
4884 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4885 LogicVRegister Simulator::frsqrts(VectorFormat vform,
4886                                   LogicVRegister dst,
4887                                   const LogicVRegister& src1,
4888                                   const LogicVRegister& src2) {
4889   dst.ClearForWrite(vform);
4890   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4891     T op1 = -src1.Float<T>(i);
4892     T op2 = src2.Float<T>(i);
4893     T result = FPProcessNaNs(op1, op2);
4894     dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
4895   }
4896   return dst;
4897 }
4898 
4899 
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4900 LogicVRegister Simulator::frsqrts(VectorFormat vform,
4901                                   LogicVRegister dst,
4902                                   const LogicVRegister& src1,
4903                                   const LogicVRegister& src2) {
4904   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4905     frsqrts<SimFloat16>(vform, dst, src1, src2);
4906   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4907     frsqrts<float>(vform, dst, src1, src2);
4908   } else {
4909     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4910     frsqrts<double>(vform, dst, src1, src2);
4911   }
4912   return dst;
4913 }
4914 
4915 
4916 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4917 LogicVRegister Simulator::fcmp(VectorFormat vform,
4918                                LogicVRegister dst,
4919                                const LogicVRegister& src1,
4920                                const LogicVRegister& src2,
4921                                Condition cond) {
4922   dst.ClearForWrite(vform);
4923   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4924     bool result = false;
4925     T op1 = src1.Float<T>(i);
4926     T op2 = src2.Float<T>(i);
4927     bool unordered = IsNaN(FPProcessNaNs(op1, op2));
4928 
4929     switch (cond) {
4930       case eq:
4931         result = (op1 == op2);
4932         break;
4933       case ge:
4934         result = (op1 >= op2);
4935         break;
4936       case gt:
4937         result = (op1 > op2);
4938         break;
4939       case le:
4940         result = (op1 <= op2);
4941         break;
4942       case lt:
4943         result = (op1 < op2);
4944         break;
4945       case ne:
4946         result = (op1 != op2);
4947         break;
4948       case uo:
4949         result = unordered;
4950         break;
4951       default:
4952         // Other conditions are defined in terms of those above.
4953         VIXL_UNREACHABLE();
4954         break;
4955     }
4956 
4957     if (result && unordered) {
4958       // Only `uo` and `ne` can be true for unordered comparisons.
4959       VIXL_ASSERT((cond == uo) || (cond == ne));
4960     }
4961 
4962     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
4963   }
4964   return dst;
4965 }
4966 
4967 
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4968 LogicVRegister Simulator::fcmp(VectorFormat vform,
4969                                LogicVRegister dst,
4970                                const LogicVRegister& src1,
4971                                const LogicVRegister& src2,
4972                                Condition cond) {
4973   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4974     fcmp<SimFloat16>(vform, dst, src1, src2, cond);
4975   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4976     fcmp<float>(vform, dst, src1, src2, cond);
4977   } else {
4978     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4979     fcmp<double>(vform, dst, src1, src2, cond);
4980   }
4981   return dst;
4982 }
4983 
4984 
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)4985 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4986                                     LogicVRegister dst,
4987                                     const LogicVRegister& src,
4988                                     Condition cond) {
4989   SimVRegister temp;
4990   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4991     LogicVRegister zero_reg =
4992         dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
4993     fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
4994   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4995     LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
4996     fcmp<float>(vform, dst, src, zero_reg, cond);
4997   } else {
4998     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4999     LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5000     fcmp<double>(vform, dst, src, zero_reg, cond);
5001   }
5002   return dst;
5003 }
5004 
5005 
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5006 LogicVRegister Simulator::fabscmp(VectorFormat vform,
5007                                   LogicVRegister dst,
5008                                   const LogicVRegister& src1,
5009                                   const LogicVRegister& src2,
5010                                   Condition cond) {
5011   SimVRegister temp1, temp2;
5012   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5013     LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5014     LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5015     fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5016   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5017     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5018     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5019     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5020   } else {
5021     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5022     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5023     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5024     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5025   }
5026   return dst;
5027 }
5028 
5029 
5030 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5031 LogicVRegister Simulator::fmla(VectorFormat vform,
5032                                LogicVRegister dst,
5033                                const LogicVRegister& srca,
5034                                const LogicVRegister& src1,
5035                                const LogicVRegister& src2) {
5036   dst.ClearForWrite(vform);
5037   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5038     T op1 = src1.Float<T>(i);
5039     T op2 = src2.Float<T>(i);
5040     T acc = srca.Float<T>(i);
5041     T result = FPMulAdd(acc, op1, op2);
5042     dst.SetFloat(vform, i, result);
5043   }
5044   return dst;
5045 }
5046 
5047 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5048 LogicVRegister Simulator::fmla(VectorFormat vform,
5049                                LogicVRegister dst,
5050                                const LogicVRegister& srca,
5051                                const LogicVRegister& src1,
5052                                const LogicVRegister& src2) {
5053   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5054     fmla<SimFloat16>(vform, dst, srca, src1, src2);
5055   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5056     fmla<float>(vform, dst, srca, src1, src2);
5057   } else {
5058     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5059     fmla<double>(vform, dst, srca, src1, src2);
5060   }
5061   return dst;
5062 }
5063 
5064 
5065 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5066 LogicVRegister Simulator::fmls(VectorFormat vform,
5067                                LogicVRegister dst,
5068                                const LogicVRegister& srca,
5069                                const LogicVRegister& src1,
5070                                const LogicVRegister& src2) {
5071   dst.ClearForWrite(vform);
5072   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5073     T op1 = -src1.Float<T>(i);
5074     T op2 = src2.Float<T>(i);
5075     T acc = srca.Float<T>(i);
5076     T result = FPMulAdd(acc, op1, op2);
5077     dst.SetFloat(i, result);
5078   }
5079   return dst;
5080 }
5081 
5082 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5083 LogicVRegister Simulator::fmls(VectorFormat vform,
5084                                LogicVRegister dst,
5085                                const LogicVRegister& srca,
5086                                const LogicVRegister& src1,
5087                                const LogicVRegister& src2) {
5088   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5089     fmls<SimFloat16>(vform, dst, srca, src1, src2);
5090   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5091     fmls<float>(vform, dst, srca, src1, src2);
5092   } else {
5093     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5094     fmls<double>(vform, dst, srca, src1, src2);
5095   }
5096   return dst;
5097 }
5098 
5099 
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5100 LogicVRegister Simulator::fmlal(VectorFormat vform,
5101                                 LogicVRegister dst,
5102                                 const LogicVRegister& src1,
5103                                 const LogicVRegister& src2) {
5104   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5105   dst.ClearForWrite(vform);
5106   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5107     float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5108     float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5109     float acc = dst.Float<float>(i);
5110     float result = FPMulAdd(acc, op1, op2);
5111     dst.SetFloat(i, result);
5112   }
5113   return dst;
5114 }
5115 
5116 
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5117 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5118                                  LogicVRegister dst,
5119                                  const LogicVRegister& src1,
5120                                  const LogicVRegister& src2) {
5121   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5122   dst.ClearForWrite(vform);
5123   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5124     int src = i + LaneCountFromFormat(vform);
5125     float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5126     float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5127     float acc = dst.Float<float>(i);
5128     float result = FPMulAdd(acc, op1, op2);
5129     dst.SetFloat(i, result);
5130   }
5131   return dst;
5132 }
5133 
5134 
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5135 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5136                                 LogicVRegister dst,
5137                                 const LogicVRegister& src1,
5138                                 const LogicVRegister& src2) {
5139   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5140   dst.ClearForWrite(vform);
5141   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5142     float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5143     float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5144     float acc = dst.Float<float>(i);
5145     float result = FPMulAdd(acc, op1, op2);
5146     dst.SetFloat(i, result);
5147   }
5148   return dst;
5149 }
5150 
5151 
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5152 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5153                                  LogicVRegister dst,
5154                                  const LogicVRegister& src1,
5155                                  const LogicVRegister& src2) {
5156   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5157   dst.ClearForWrite(vform);
5158   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5159     int src = i + LaneCountFromFormat(vform);
5160     float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5161     float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5162     float acc = dst.Float<float>(i);
5163     float result = FPMulAdd(acc, op1, op2);
5164     dst.SetFloat(i, result);
5165   }
5166   return dst;
5167 }
5168 
5169 
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5170 LogicVRegister Simulator::fmlal(VectorFormat vform,
5171                                 LogicVRegister dst,
5172                                 const LogicVRegister& src1,
5173                                 const LogicVRegister& src2,
5174                                 int index) {
5175   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5176   dst.ClearForWrite(vform);
5177   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5178   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5179     float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5180     float acc = dst.Float<float>(i);
5181     float result = FPMulAdd(acc, op1, op2);
5182     dst.SetFloat(i, result);
5183   }
5184   return dst;
5185 }
5186 
5187 
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5188 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5189                                  LogicVRegister dst,
5190                                  const LogicVRegister& src1,
5191                                  const LogicVRegister& src2,
5192                                  int index) {
5193   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5194   dst.ClearForWrite(vform);
5195   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5196   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5197     int src = i + LaneCountFromFormat(vform);
5198     float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5199     float acc = dst.Float<float>(i);
5200     float result = FPMulAdd(acc, op1, op2);
5201     dst.SetFloat(i, result);
5202   }
5203   return dst;
5204 }
5205 
5206 
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5207 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5208                                 LogicVRegister dst,
5209                                 const LogicVRegister& src1,
5210                                 const LogicVRegister& src2,
5211                                 int index) {
5212   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5213   dst.ClearForWrite(vform);
5214   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5215   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5216     float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5217     float acc = dst.Float<float>(i);
5218     float result = FPMulAdd(acc, op1, op2);
5219     dst.SetFloat(i, result);
5220   }
5221   return dst;
5222 }
5223 
5224 
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5225 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5226                                  LogicVRegister dst,
5227                                  const LogicVRegister& src1,
5228                                  const LogicVRegister& src2,
5229                                  int index) {
5230   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5231   dst.ClearForWrite(vform);
5232   float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5233   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5234     int src = i + LaneCountFromFormat(vform);
5235     float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5236     float acc = dst.Float<float>(i);
5237     float result = FPMulAdd(acc, op1, op2);
5238     dst.SetFloat(i, result);
5239   }
5240   return dst;
5241 }
5242 
5243 
5244 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5245 LogicVRegister Simulator::fneg(VectorFormat vform,
5246                                LogicVRegister dst,
5247                                const LogicVRegister& src) {
5248   dst.ClearForWrite(vform);
5249   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5250     T op = src.Float<T>(i);
5251     op = -op;
5252     dst.SetFloat(i, op);
5253   }
5254   return dst;
5255 }
5256 
5257 
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5258 LogicVRegister Simulator::fneg(VectorFormat vform,
5259                                LogicVRegister dst,
5260                                const LogicVRegister& src) {
5261   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5262     fneg<SimFloat16>(vform, dst, src);
5263   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5264     fneg<float>(vform, dst, src);
5265   } else {
5266     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5267     fneg<double>(vform, dst, src);
5268   }
5269   return dst;
5270 }
5271 
5272 
5273 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5274 LogicVRegister Simulator::fabs_(VectorFormat vform,
5275                                 LogicVRegister dst,
5276                                 const LogicVRegister& src) {
5277   dst.ClearForWrite(vform);
5278   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5279     T op = src.Float<T>(i);
5280     if (copysign(1.0, op) < 0.0) {
5281       op = -op;
5282     }
5283     dst.SetFloat(i, op);
5284   }
5285   return dst;
5286 }
5287 
5288 
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5289 LogicVRegister Simulator::fabs_(VectorFormat vform,
5290                                 LogicVRegister dst,
5291                                 const LogicVRegister& src) {
5292   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5293     fabs_<SimFloat16>(vform, dst, src);
5294   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5295     fabs_<float>(vform, dst, src);
5296   } else {
5297     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5298     fabs_<double>(vform, dst, src);
5299   }
5300   return dst;
5301 }
5302 
5303 
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5304 LogicVRegister Simulator::fabd(VectorFormat vform,
5305                                LogicVRegister dst,
5306                                const LogicVRegister& src1,
5307                                const LogicVRegister& src2) {
5308   SimVRegister temp;
5309   fsub(vform, temp, src1, src2);
5310   fabs_(vform, dst, temp);
5311   return dst;
5312 }
5313 
5314 
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5315 LogicVRegister Simulator::fsqrt(VectorFormat vform,
5316                                 LogicVRegister dst,
5317                                 const LogicVRegister& src) {
5318   dst.ClearForWrite(vform);
5319   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5320     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5321       SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5322       dst.SetFloat(i, result);
5323     }
5324   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5325     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5326       float result = FPSqrt(src.Float<float>(i));
5327       dst.SetFloat(i, result);
5328     }
5329   } else {
5330     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5331     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5332       double result = FPSqrt(src.Float<double>(i));
5333       dst.SetFloat(i, result);
5334     }
5335   }
5336   return dst;
5337 }
5338 
5339 
5340 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                                    \
5341   LogicVRegister Simulator::FNP(VectorFormat vform,                            \
5342                                 LogicVRegister dst,                            \
5343                                 const LogicVRegister& src1,                    \
5344                                 const LogicVRegister& src2) {                  \
5345     SimVRegister temp1, temp2;                                                 \
5346     uzp1(vform, temp1, src1, src2);                                            \
5347     uzp2(vform, temp2, src1, src2);                                            \
5348     FN(vform, dst, temp1, temp2);                                              \
5349     return dst;                                                                \
5350   }                                                                            \
5351                                                                                \
5352   LogicVRegister Simulator::FNP(VectorFormat vform,                            \
5353                                 LogicVRegister dst,                            \
5354                                 const LogicVRegister& src) {                   \
5355     if (vform == kFormatH) {                                                   \
5356       SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))),   \
5357                            SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
5358       dst.SetUint(vform, 0, Float16ToRawbits(result));                         \
5359     } else if (vform == kFormatS) {                                            \
5360       float result = OP(src.Float<float>(0), src.Float<float>(1));             \
5361       dst.SetFloat(0, result);                                                 \
5362     } else {                                                                   \
5363       VIXL_ASSERT(vform == kFormatD);                                          \
5364       double result = OP(src.Float<double>(0), src.Float<double>(1));          \
5365       dst.SetFloat(0, result);                                                 \
5366     }                                                                          \
5367     dst.ClearForWrite(vform);                                                  \
5368     return dst;                                                                \
5369   }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)5370 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5371 #undef DEFINE_NEON_FP_PAIR_OP
5372 
5373 template <typename T>
5374 LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5375                                                LogicVRegister dst,
5376                                                const LogicVRegister& src,
5377                                                typename TFPPairOp<T>::type fn,
5378                                                uint64_t inactive_value) {
5379   int lane_count = LaneCountFromFormat(vform);
5380   T result[kZRegMaxSizeInBytes / sizeof(T)];
5381   // Copy the source vector into a working array. Initialise the unused elements
5382   // at the end of the array to the same value that a false predicate would set.
5383   for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5384     result[i] = (i < lane_count)
5385                     ? src.Float<T>(i)
5386                     : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5387   }
5388 
5389   // Pairwise reduce the elements to a single value, using the pair op function
5390   // argument.
5391   for (int step = 1; step < lane_count; step *= 2) {
5392     for (int i = 0; i < lane_count; i += step * 2) {
5393       result[i] = (this->*fn)(result[i], result[i + step]);
5394     }
5395   }
5396   dst.ClearForWrite(ScalarFormatFromFormat(vform));
5397   dst.SetFloat<T>(0, result[0]);
5398   return dst;
5399 }
5400 
FPPairedAcrossHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,typename TFPPairOp<SimFloat16>::type fn16,typename TFPPairOp<float>::type fn32,typename TFPPairOp<double>::type fn64,uint64_t inactive_value)5401 LogicVRegister Simulator::FPPairedAcrossHelper(
5402     VectorFormat vform,
5403     LogicVRegister dst,
5404     const LogicVRegister& src,
5405     typename TFPPairOp<SimFloat16>::type fn16,
5406     typename TFPPairOp<float>::type fn32,
5407     typename TFPPairOp<double>::type fn64,
5408     uint64_t inactive_value) {
5409   switch (LaneSizeInBitsFromFormat(vform)) {
5410     case kHRegSize:
5411       return FPPairedAcrossHelper<SimFloat16>(vform,
5412                                               dst,
5413                                               src,
5414                                               fn16,
5415                                               inactive_value);
5416     case kSRegSize:
5417       return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5418     default:
5419       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5420       return FPPairedAcrossHelper<double>(vform,
5421                                           dst,
5422                                           src,
5423                                           fn64,
5424                                           inactive_value);
5425   }
5426 }
5427 
faddv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5428 LogicVRegister Simulator::faddv(VectorFormat vform,
5429                                 LogicVRegister dst,
5430                                 const LogicVRegister& src) {
5431   return FPPairedAcrossHelper(vform,
5432                               dst,
5433                               src,
5434                               &Simulator::FPAdd<SimFloat16>,
5435                               &Simulator::FPAdd<float>,
5436                               &Simulator::FPAdd<double>,
5437                               0);
5438 }
5439 
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5440 LogicVRegister Simulator::fmaxv(VectorFormat vform,
5441                                 LogicVRegister dst,
5442                                 const LogicVRegister& src) {
5443   int lane_size = LaneSizeInBitsFromFormat(vform);
5444   uint64_t inactive_value =
5445       FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5446   return FPPairedAcrossHelper(vform,
5447                               dst,
5448                               src,
5449                               &Simulator::FPMax<SimFloat16>,
5450                               &Simulator::FPMax<float>,
5451                               &Simulator::FPMax<double>,
5452                               inactive_value);
5453 }
5454 
5455 
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5456 LogicVRegister Simulator::fminv(VectorFormat vform,
5457                                 LogicVRegister dst,
5458                                 const LogicVRegister& src) {
5459   int lane_size = LaneSizeInBitsFromFormat(vform);
5460   uint64_t inactive_value =
5461       FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5462   return FPPairedAcrossHelper(vform,
5463                               dst,
5464                               src,
5465                               &Simulator::FPMin<SimFloat16>,
5466                               &Simulator::FPMin<float>,
5467                               &Simulator::FPMin<double>,
5468                               inactive_value);
5469 }
5470 
5471 
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5472 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5473                                   LogicVRegister dst,
5474                                   const LogicVRegister& src) {
5475   int lane_size = LaneSizeInBitsFromFormat(vform);
5476   uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5477   return FPPairedAcrossHelper(vform,
5478                               dst,
5479                               src,
5480                               &Simulator::FPMaxNM<SimFloat16>,
5481                               &Simulator::FPMaxNM<float>,
5482                               &Simulator::FPMaxNM<double>,
5483                               inactive_value);
5484 }
5485 
5486 
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5487 LogicVRegister Simulator::fminnmv(VectorFormat vform,
5488                                   LogicVRegister dst,
5489                                   const LogicVRegister& src) {
5490   int lane_size = LaneSizeInBitsFromFormat(vform);
5491   uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5492   return FPPairedAcrossHelper(vform,
5493                               dst,
5494                               src,
5495                               &Simulator::FPMinNM<SimFloat16>,
5496                               &Simulator::FPMinNM<float>,
5497                               &Simulator::FPMinNM<double>,
5498                               inactive_value);
5499 }
5500 
5501 
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5502 LogicVRegister Simulator::fmul(VectorFormat vform,
5503                                LogicVRegister dst,
5504                                const LogicVRegister& src1,
5505                                const LogicVRegister& src2,
5506                                int index) {
5507   dst.ClearForWrite(vform);
5508   SimVRegister temp;
5509   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5510     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5511     fmul<SimFloat16>(vform, dst, src1, index_reg);
5512   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5513     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5514     fmul<float>(vform, dst, src1, index_reg);
5515   } else {
5516     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5517     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5518     fmul<double>(vform, dst, src1, index_reg);
5519   }
5520   return dst;
5521 }
5522 
5523 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5524 LogicVRegister Simulator::fmla(VectorFormat vform,
5525                                LogicVRegister dst,
5526                                const LogicVRegister& src1,
5527                                const LogicVRegister& src2,
5528                                int index) {
5529   dst.ClearForWrite(vform);
5530   SimVRegister temp;
5531   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5532     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5533     fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5534   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5535     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5536     fmla<float>(vform, dst, dst, src1, index_reg);
5537   } else {
5538     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5539     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5540     fmla<double>(vform, dst, dst, src1, index_reg);
5541   }
5542   return dst;
5543 }
5544 
5545 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5546 LogicVRegister Simulator::fmls(VectorFormat vform,
5547                                LogicVRegister dst,
5548                                const LogicVRegister& src1,
5549                                const LogicVRegister& src2,
5550                                int index) {
5551   dst.ClearForWrite(vform);
5552   SimVRegister temp;
5553   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5554     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5555     fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5556   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5557     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5558     fmls<float>(vform, dst, dst, src1, index_reg);
5559   } else {
5560     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5561     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5562     fmls<double>(vform, dst, dst, src1, index_reg);
5563   }
5564   return dst;
5565 }
5566 
5567 
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5568 LogicVRegister Simulator::fmulx(VectorFormat vform,
5569                                 LogicVRegister dst,
5570                                 const LogicVRegister& src1,
5571                                 const LogicVRegister& src2,
5572                                 int index) {
5573   dst.ClearForWrite(vform);
5574   SimVRegister temp;
5575   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5576     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5577     fmulx<SimFloat16>(vform, dst, src1, index_reg);
5578   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5579     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5580     fmulx<float>(vform, dst, src1, index_reg);
5581   } else {
5582     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5583     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5584     fmulx<double>(vform, dst, src1, index_reg);
5585   }
5586   return dst;
5587 }
5588 
5589 
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception,FrintMode frint_mode)5590 LogicVRegister Simulator::frint(VectorFormat vform,
5591                                 LogicVRegister dst,
5592                                 const LogicVRegister& src,
5593                                 FPRounding rounding_mode,
5594                                 bool inexact_exception,
5595                                 FrintMode frint_mode) {
5596   dst.ClearForWrite(vform);
5597   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5598     VIXL_ASSERT(frint_mode == kFrintToInteger);
5599     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5600       SimFloat16 input = src.Float<SimFloat16>(i);
5601       SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5602       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5603         FPProcessException();
5604       }
5605       dst.SetFloat<SimFloat16>(i, rounded);
5606     }
5607   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5608     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5609       float input = src.Float<float>(i);
5610       float rounded = FPRoundInt(input, rounding_mode, frint_mode);
5611 
5612       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5613         FPProcessException();
5614       }
5615       dst.SetFloat<float>(i, rounded);
5616     }
5617   } else {
5618     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5619     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5620       double input = src.Float<double>(i);
5621       double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5622       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5623         FPProcessException();
5624       }
5625       dst.SetFloat<double>(i, rounded);
5626     }
5627   }
5628   return dst;
5629 }
5630 
fcvt(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)5631 LogicVRegister Simulator::fcvt(VectorFormat vform,
5632                                unsigned dst_data_size_in_bits,
5633                                unsigned src_data_size_in_bits,
5634                                LogicVRegister dst,
5635                                const LogicPRegister& pg,
5636                                const LogicVRegister& src) {
5637   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5638   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5639 
5640   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5641     if (!pg.IsActive(vform, i)) continue;
5642 
5643     uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5644                                                       0,
5645                                                       src.Uint(vform, i));
5646     double dst_value =
5647         RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5648 
5649     uint64_t dst_raw_bits =
5650         FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5651 
5652     dst.SetUint(vform, i, dst_raw_bits);
5653   }
5654 
5655   return dst;
5656 }
5657 
fcvts(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5658 LogicVRegister Simulator::fcvts(VectorFormat vform,
5659                                 unsigned dst_data_size_in_bits,
5660                                 unsigned src_data_size_in_bits,
5661                                 LogicVRegister dst,
5662                                 const LogicPRegister& pg,
5663                                 const LogicVRegister& src,
5664                                 FPRounding round,
5665                                 int fbits) {
5666   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5667   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5668 
5669   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5670     if (!pg.IsActive(vform, i)) continue;
5671 
5672     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5673                                                0,
5674                                                src.Uint(vform, i));
5675     double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5676                     std::pow(2.0, fbits);
5677 
5678     switch (dst_data_size_in_bits) {
5679       case kHRegSize:
5680         dst.SetInt(vform, i, FPToInt16(result, round));
5681         break;
5682       case kSRegSize:
5683         dst.SetInt(vform, i, FPToInt32(result, round));
5684         break;
5685       case kDRegSize:
5686         dst.SetInt(vform, i, FPToInt64(result, round));
5687         break;
5688       default:
5689         VIXL_UNIMPLEMENTED();
5690         break;
5691     }
5692   }
5693 
5694   return dst;
5695 }
5696 
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5697 LogicVRegister Simulator::fcvts(VectorFormat vform,
5698                                 LogicVRegister dst,
5699                                 const LogicVRegister& src,
5700                                 FPRounding round,
5701                                 int fbits) {
5702   dst.ClearForWrite(vform);
5703   return fcvts(vform,
5704                LaneSizeInBitsFromFormat(vform),
5705                LaneSizeInBitsFromFormat(vform),
5706                dst,
5707                GetPTrue(),
5708                src,
5709                round,
5710                fbits);
5711 }
5712 
fcvtu(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5713 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5714                                 unsigned dst_data_size_in_bits,
5715                                 unsigned src_data_size_in_bits,
5716                                 LogicVRegister dst,
5717                                 const LogicPRegister& pg,
5718                                 const LogicVRegister& src,
5719                                 FPRounding round,
5720                                 int fbits) {
5721   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5722   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5723 
5724   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5725     if (!pg.IsActive(vform, i)) continue;
5726 
5727     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5728                                                0,
5729                                                src.Uint(vform, i));
5730     double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5731                     std::pow(2.0, fbits);
5732 
5733     switch (dst_data_size_in_bits) {
5734       case kHRegSize:
5735         dst.SetUint(vform, i, FPToUInt16(result, round));
5736         break;
5737       case kSRegSize:
5738         dst.SetUint(vform, i, FPToUInt32(result, round));
5739         break;
5740       case kDRegSize:
5741         dst.SetUint(vform, i, FPToUInt64(result, round));
5742         break;
5743       default:
5744         VIXL_UNIMPLEMENTED();
5745         break;
5746     }
5747   }
5748 
5749   return dst;
5750 }
5751 
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5752 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5753                                 LogicVRegister dst,
5754                                 const LogicVRegister& src,
5755                                 FPRounding round,
5756                                 int fbits) {
5757   dst.ClearForWrite(vform);
5758   return fcvtu(vform,
5759                LaneSizeInBitsFromFormat(vform),
5760                LaneSizeInBitsFromFormat(vform),
5761                dst,
5762                GetPTrue(),
5763                src,
5764                round,
5765                fbits);
5766 }
5767 
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5768 LogicVRegister Simulator::fcvtl(VectorFormat vform,
5769                                 LogicVRegister dst,
5770                                 const LogicVRegister& src) {
5771   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5772     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5773       // TODO: Full support for SimFloat16 in SimRegister(s).
5774       dst.SetFloat(i,
5775                    FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
5776                              ReadDN()));
5777     }
5778   } else {
5779     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5780     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5781       dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
5782     }
5783   }
5784   return dst;
5785 }
5786 
5787 
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5788 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
5789                                  LogicVRegister dst,
5790                                  const LogicVRegister& src) {
5791   int lane_count = LaneCountFromFormat(vform);
5792   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5793     for (int i = 0; i < lane_count; i++) {
5794       // TODO: Full support for SimFloat16 in SimRegister(s).
5795       dst.SetFloat(i,
5796                    FPToFloat(RawbitsToFloat16(
5797                                  src.Float<uint16_t>(i + lane_count)),
5798                              ReadDN()));
5799     }
5800   } else {
5801     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5802     for (int i = 0; i < lane_count; i++) {
5803       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
5804     }
5805   }
5806   return dst;
5807 }
5808 
5809 
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5810 LogicVRegister Simulator::fcvtn(VectorFormat vform,
5811                                 LogicVRegister dst,
5812                                 const LogicVRegister& src) {
5813   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5814     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5815       dst.SetFloat(i,
5816                    Float16ToRawbits(
5817                        FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
5818     }
5819   } else {
5820     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5821     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5822       dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
5823     }
5824   }
5825   return dst;
5826 }
5827 
5828 
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5829 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
5830                                  LogicVRegister dst,
5831                                  const LogicVRegister& src) {
5832   int lane_count = LaneCountFromFormat(vform) / 2;
5833   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5834     for (int i = lane_count - 1; i >= 0; i--) {
5835       dst.SetFloat(i + lane_count,
5836                    Float16ToRawbits(
5837                        FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
5838     }
5839   } else {
5840     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5841     for (int i = lane_count - 1; i >= 0; i--) {
5842       dst.SetFloat(i + lane_count,
5843                    FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
5844     }
5845   }
5846   return dst;
5847 }
5848 
5849 
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5850 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
5851                                  LogicVRegister dst,
5852                                  const LogicVRegister& src) {
5853   dst.ClearForWrite(vform);
5854   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5855   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5856     dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
5857   }
5858   return dst;
5859 }
5860 
5861 
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5862 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
5863                                   LogicVRegister dst,
5864                                   const LogicVRegister& src) {
5865   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5866   int lane_count = LaneCountFromFormat(vform) / 2;
5867   for (int i = lane_count - 1; i >= 0; i--) {
5868     dst.SetFloat(i + lane_count,
5869                  FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
5870   }
5871   return dst;
5872 }
5873 
5874 
5875 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)5876 double Simulator::recip_sqrt_estimate(double a) {
5877   int q0, q1, s;
5878   double r;
5879   if (a < 0.5) {
5880     q0 = static_cast<int>(a * 512.0);
5881     r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
5882   } else {
5883     q1 = static_cast<int>(a * 256.0);
5884     r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
5885   }
5886   s = static_cast<int>(256.0 * r + 0.5);
5887   return static_cast<double>(s) / 256.0;
5888 }
5889 
5890 
Bits(uint64_t val,int start_bit,int end_bit)5891 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
5892   return ExtractUnsignedBitfield64(start_bit, end_bit, val);
5893 }
5894 
5895 
5896 template <typename T>
FPRecipSqrtEstimate(T op)5897 T Simulator::FPRecipSqrtEstimate(T op) {
5898   if (IsNaN(op)) {
5899     return FPProcessNaN(op);
5900   } else if (op == 0.0) {
5901     if (copysign(1.0, op) < 0.0) {
5902       return kFP64NegativeInfinity;
5903     } else {
5904       return kFP64PositiveInfinity;
5905     }
5906   } else if (copysign(1.0, op) < 0.0) {
5907     FPProcessException();
5908     return FPDefaultNaN<T>();
5909   } else if (IsInf(op)) {
5910     return 0.0;
5911   } else {
5912     uint64_t fraction;
5913     int exp, result_exp;
5914 
5915     if (IsFloat16<T>()) {
5916       exp = Float16Exp(op);
5917       fraction = Float16Mantissa(op);
5918       fraction <<= 42;
5919     } else if (IsFloat32<T>()) {
5920       exp = FloatExp(op);
5921       fraction = FloatMantissa(op);
5922       fraction <<= 29;
5923     } else {
5924       VIXL_ASSERT(IsFloat64<T>());
5925       exp = DoubleExp(op);
5926       fraction = DoubleMantissa(op);
5927     }
5928 
5929     if (exp == 0) {
5930       while (Bits(fraction, 51, 51) == 0) {
5931         fraction = Bits(fraction, 50, 0) << 1;
5932         exp -= 1;
5933       }
5934       fraction = Bits(fraction, 50, 0) << 1;
5935     }
5936 
5937     double scaled;
5938     if (Bits(exp, 0, 0) == 0) {
5939       scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
5940     } else {
5941       scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
5942     }
5943 
5944     if (IsFloat16<T>()) {
5945       result_exp = (44 - exp) / 2;
5946     } else if (IsFloat32<T>()) {
5947       result_exp = (380 - exp) / 2;
5948     } else {
5949       VIXL_ASSERT(IsFloat64<T>());
5950       result_exp = (3068 - exp) / 2;
5951     }
5952 
5953     uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
5954 
5955     if (IsFloat16<T>()) {
5956       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
5957       uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
5958       return Float16Pack(0, exp_bits, est_bits);
5959     } else if (IsFloat32<T>()) {
5960       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
5961       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
5962       return FloatPack(0, exp_bits, est_bits);
5963     } else {
5964       VIXL_ASSERT(IsFloat64<T>());
5965       return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
5966     }
5967   }
5968 }
5969 
5970 
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5971 LogicVRegister Simulator::frsqrte(VectorFormat vform,
5972                                   LogicVRegister dst,
5973                                   const LogicVRegister& src) {
5974   dst.ClearForWrite(vform);
5975   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5976     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5977       SimFloat16 input = src.Float<SimFloat16>(i);
5978       dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
5979     }
5980   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5981     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5982       float input = src.Float<float>(i);
5983       dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
5984     }
5985   } else {
5986     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5987     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5988       double input = src.Float<double>(i);
5989       dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
5990     }
5991   }
5992   return dst;
5993 }
5994 
5995 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)5996 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
5997   uint32_t sign;
5998 
5999   if (IsFloat16<T>()) {
6000     sign = Float16Sign(op);
6001   } else if (IsFloat32<T>()) {
6002     sign = FloatSign(op);
6003   } else {
6004     VIXL_ASSERT(IsFloat64<T>());
6005     sign = DoubleSign(op);
6006   }
6007 
6008   if (IsNaN(op)) {
6009     return FPProcessNaN(op);
6010   } else if (IsInf(op)) {
6011     return (sign == 1) ? -0.0 : 0.0;
6012   } else if (op == 0.0) {
6013     FPProcessException();  // FPExc_DivideByZero exception.
6014     return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6015   } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6016              (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6017              (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6018     bool overflow_to_inf = false;
6019     switch (rounding) {
6020       case FPTieEven:
6021         overflow_to_inf = true;
6022         break;
6023       case FPPositiveInfinity:
6024         overflow_to_inf = (sign == 0);
6025         break;
6026       case FPNegativeInfinity:
6027         overflow_to_inf = (sign == 1);
6028         break;
6029       case FPZero:
6030         overflow_to_inf = false;
6031         break;
6032       default:
6033         break;
6034     }
6035     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
6036     if (overflow_to_inf) {
6037       return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6038     } else {
6039       // Return FPMaxNormal(sign).
6040       if (IsFloat16<T>()) {
6041         return Float16Pack(sign, 0x1f, 0x3ff);
6042       } else if (IsFloat32<T>()) {
6043         return FloatPack(sign, 0xfe, 0x07fffff);
6044       } else {
6045         VIXL_ASSERT(IsFloat64<T>());
6046         return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6047       }
6048     }
6049   } else {
6050     uint64_t fraction;
6051     int exp, result_exp;
6052     uint32_t sign;
6053 
6054     if (IsFloat16<T>()) {
6055       sign = Float16Sign(op);
6056       exp = Float16Exp(op);
6057       fraction = Float16Mantissa(op);
6058       fraction <<= 42;
6059     } else if (IsFloat32<T>()) {
6060       sign = FloatSign(op);
6061       exp = FloatExp(op);
6062       fraction = FloatMantissa(op);
6063       fraction <<= 29;
6064     } else {
6065       VIXL_ASSERT(IsFloat64<T>());
6066       sign = DoubleSign(op);
6067       exp = DoubleExp(op);
6068       fraction = DoubleMantissa(op);
6069     }
6070 
6071     if (exp == 0) {
6072       if (Bits(fraction, 51, 51) == 0) {
6073         exp -= 1;
6074         fraction = Bits(fraction, 49, 0) << 2;
6075       } else {
6076         fraction = Bits(fraction, 50, 0) << 1;
6077       }
6078     }
6079 
6080     double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6081 
6082     if (IsFloat16<T>()) {
6083       result_exp = (29 - exp);  // In range 29-30 = -1 to 29+1 = 30.
6084     } else if (IsFloat32<T>()) {
6085       result_exp = (253 - exp);  // In range 253-254 = -1 to 253+1 = 254.
6086     } else {
6087       VIXL_ASSERT(IsFloat64<T>());
6088       result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
6089     }
6090 
6091     double estimate = recip_estimate(scaled);
6092 
6093     fraction = DoubleMantissa(estimate);
6094     if (result_exp == 0) {
6095       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6096     } else if (result_exp == -1) {
6097       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6098       result_exp = 0;
6099     }
6100     if (IsFloat16<T>()) {
6101       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6102       uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6103       return Float16Pack(sign, exp_bits, frac_bits);
6104     } else if (IsFloat32<T>()) {
6105       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6106       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6107       return FloatPack(sign, exp_bits, frac_bits);
6108     } else {
6109       VIXL_ASSERT(IsFloat64<T>());
6110       return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6111     }
6112   }
6113 }
6114 
6115 
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)6116 LogicVRegister Simulator::frecpe(VectorFormat vform,
6117                                  LogicVRegister dst,
6118                                  const LogicVRegister& src,
6119                                  FPRounding round) {
6120   dst.ClearForWrite(vform);
6121   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6122     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6123       SimFloat16 input = src.Float<SimFloat16>(i);
6124       dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6125     }
6126   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6127     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6128       float input = src.Float<float>(i);
6129       dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6130     }
6131   } else {
6132     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6133     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6134       double input = src.Float<double>(i);
6135       dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6136     }
6137   }
6138   return dst;
6139 }
6140 
6141 
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6142 LogicVRegister Simulator::ursqrte(VectorFormat vform,
6143                                   LogicVRegister dst,
6144                                   const LogicVRegister& src) {
6145   dst.ClearForWrite(vform);
6146   uint64_t operand;
6147   uint32_t result;
6148   double dp_operand, dp_result;
6149   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6150     operand = src.Uint(vform, i);
6151     if (operand <= 0x3FFFFFFF) {
6152       result = 0xFFFFFFFF;
6153     } else {
6154       dp_operand = operand * std::pow(2.0, -32);
6155       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6156       result = static_cast<uint32_t>(dp_result);
6157     }
6158     dst.SetUint(vform, i, result);
6159   }
6160   return dst;
6161 }
6162 
6163 
6164 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)6165 double Simulator::recip_estimate(double a) {
6166   int q, s;
6167   double r;
6168   q = static_cast<int>(a * 512.0);
6169   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6170   s = static_cast<int>(256.0 * r + 0.5);
6171   return static_cast<double>(s) / 256.0;
6172 }
6173 
6174 
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6175 LogicVRegister Simulator::urecpe(VectorFormat vform,
6176                                  LogicVRegister dst,
6177                                  const LogicVRegister& src) {
6178   dst.ClearForWrite(vform);
6179   uint64_t operand;
6180   uint32_t result;
6181   double dp_operand, dp_result;
6182   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6183     operand = src.Uint(vform, i);
6184     if (operand <= 0x7FFFFFFF) {
6185       result = 0xFFFFFFFF;
6186     } else {
6187       dp_operand = operand * std::pow(2.0, -32);
6188       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6189       result = static_cast<uint32_t>(dp_result);
6190     }
6191     dst.SetUint(vform, i, result);
6192   }
6193   return dst;
6194 }
6195 
pfalse(LogicPRegister dst)6196 LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6197   dst.Clear();
6198   return dst;
6199 }
6200 
pfirst(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6201 LogicPRegister Simulator::pfirst(LogicPRegister dst,
6202                                  const LogicPRegister& pg,
6203                                  const LogicPRegister& src) {
6204   int first_pg = GetFirstActive(kFormatVnB, pg);
6205   VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6206   mov(dst, src);
6207   if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6208   return dst;
6209 }
6210 
ptrue(VectorFormat vform,LogicPRegister dst,int pattern)6211 LogicPRegister Simulator::ptrue(VectorFormat vform,
6212                                 LogicPRegister dst,
6213                                 int pattern) {
6214   int count = GetPredicateConstraintLaneCount(vform, pattern);
6215   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6216     dst.SetActive(vform, i, i < count);
6217   }
6218   return dst;
6219 }
6220 
pnext(VectorFormat vform,LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6221 LogicPRegister Simulator::pnext(VectorFormat vform,
6222                                 LogicPRegister dst,
6223                                 const LogicPRegister& pg,
6224                                 const LogicPRegister& src) {
6225   int next = GetLastActive(vform, src) + 1;
6226   while (next < LaneCountFromFormat(vform)) {
6227     if (pg.IsActive(vform, next)) break;
6228     next++;
6229   }
6230 
6231   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6232     dst.SetActive(vform, i, (i == next));
6233   }
6234   return dst;
6235 }
6236 
6237 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6238 LogicVRegister Simulator::frecpx(VectorFormat vform,
6239                                  LogicVRegister dst,
6240                                  const LogicVRegister& src) {
6241   dst.ClearForWrite(vform);
6242   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6243     T op = src.Float<T>(i);
6244     T result;
6245     if (IsNaN(op)) {
6246       result = FPProcessNaN(op);
6247     } else {
6248       int exp;
6249       uint32_t sign;
6250       if (IsFloat16<T>()) {
6251         sign = Float16Sign(op);
6252         exp = Float16Exp(op);
6253         exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6254         result = Float16Pack(sign, exp, 0);
6255       } else if (IsFloat32<T>()) {
6256         sign = FloatSign(op);
6257         exp = FloatExp(op);
6258         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6259         result = FloatPack(sign, exp, 0);
6260       } else {
6261         VIXL_ASSERT(IsFloat64<T>());
6262         sign = DoubleSign(op);
6263         exp = DoubleExp(op);
6264         exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6265         result = DoublePack(sign, exp, 0);
6266       }
6267     }
6268     dst.SetFloat(i, result);
6269   }
6270   return dst;
6271 }
6272 
6273 
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6274 LogicVRegister Simulator::frecpx(VectorFormat vform,
6275                                  LogicVRegister dst,
6276                                  const LogicVRegister& src) {
6277   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6278     frecpx<SimFloat16>(vform, dst, src);
6279   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6280     frecpx<float>(vform, dst, src);
6281   } else {
6282     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6283     frecpx<double>(vform, dst, src);
6284   }
6285   return dst;
6286 }
6287 
ftsmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6288 LogicVRegister Simulator::ftsmul(VectorFormat vform,
6289                                  LogicVRegister dst,
6290                                  const LogicVRegister& src1,
6291                                  const LogicVRegister& src2) {
6292   SimVRegister maybe_neg_src1;
6293 
6294   // The bottom bit of src2 controls the sign of the result. Use it to
6295   // conditionally invert the sign of one `fmul` operand.
6296   shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6297   eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6298 
6299   // Multiply src1 by the modified neg_src1, which is potentially its negation.
6300   // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6301   // rather than neg_src1, must be the first source argument.
6302   fmul(vform, dst, src1, maybe_neg_src1);
6303 
6304   return dst;
6305 }
6306 
ftssel(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6307 LogicVRegister Simulator::ftssel(VectorFormat vform,
6308                                  LogicVRegister dst,
6309                                  const LogicVRegister& src1,
6310                                  const LogicVRegister& src2) {
6311   unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6312   uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6313   uint64_t one;
6314 
6315   if (lane_bits == kHRegSize) {
6316     one = Float16ToRawbits(Float16(1.0));
6317   } else if (lane_bits == kSRegSize) {
6318     one = FloatToRawbits(1.0);
6319   } else {
6320     VIXL_ASSERT(lane_bits == kDRegSize);
6321     one = DoubleToRawbits(1.0);
6322   }
6323 
6324   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6325     // Use integer accessors for this operation, as this is a data manipulation
6326     // task requiring no calculation.
6327     uint64_t op = src1.Uint(vform, i);
6328 
6329     // Only the bottom two bits of the src2 register are significant, indicating
6330     // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6331     // determines the sign of the value written to dst.
6332     uint64_t q = src2.Uint(vform, i);
6333     if ((q & 1) == 1) op = one;
6334     if ((q & 2) == 2) op ^= sign_bit;
6335 
6336     dst.SetUint(vform, i, op);
6337   }
6338 
6339   return dst;
6340 }
6341 
6342 template <typename T>
FTMaddHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,uint64_t coeff_pos,uint64_t coeff_neg)6343 LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6344                                        LogicVRegister dst,
6345                                        const LogicVRegister& src1,
6346                                        const LogicVRegister& src2,
6347                                        uint64_t coeff_pos,
6348                                        uint64_t coeff_neg) {
6349   SimVRegister zero;
6350   dup_immediate(kFormatVnB, zero, 0);
6351 
6352   SimVRegister cf;
6353   SimVRegister cfn;
6354   dup_immediate(vform, cf, coeff_pos);
6355   dup_immediate(vform, cfn, coeff_neg);
6356 
6357   // The specification requires testing the top bit of the raw value, rather
6358   // than the sign of the floating point number, so use an integer comparison
6359   // here.
6360   SimPRegister is_neg;
6361   SVEIntCompareVectorsHelper(lt,
6362                              vform,
6363                              is_neg,
6364                              GetPTrue(),
6365                              src2,
6366                              zero,
6367                              false,
6368                              LeaveFlags);
6369   mov_merging(vform, cf, is_neg, cfn);
6370 
6371   SimVRegister temp;
6372   fabs_<T>(vform, temp, src2);
6373   fmla<T>(vform, cf, cf, src1, temp);
6374   mov(vform, dst, cf);
6375   return dst;
6376 }
6377 
6378 
ftmad(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,unsigned index)6379 LogicVRegister Simulator::ftmad(VectorFormat vform,
6380                                 LogicVRegister dst,
6381                                 const LogicVRegister& src1,
6382                                 const LogicVRegister& src2,
6383                                 unsigned index) {
6384   static const uint64_t ftmad_coeff16[] = {0x3c00,
6385                                            0xb155,
6386                                            0x2030,
6387                                            0x0000,
6388                                            0x0000,
6389                                            0x0000,
6390                                            0x0000,
6391                                            0x0000,
6392                                            0x3c00,
6393                                            0xb800,
6394                                            0x293a,
6395                                            0x0000,
6396                                            0x0000,
6397                                            0x0000,
6398                                            0x0000,
6399                                            0x0000};
6400 
6401   static const uint64_t ftmad_coeff32[] = {0x3f800000,
6402                                            0xbe2aaaab,
6403                                            0x3c088886,
6404                                            0xb95008b9,
6405                                            0x36369d6d,
6406                                            0x00000000,
6407                                            0x00000000,
6408                                            0x00000000,
6409                                            0x3f800000,
6410                                            0xbf000000,
6411                                            0x3d2aaaa6,
6412                                            0xbab60705,
6413                                            0x37cd37cc,
6414                                            0x00000000,
6415                                            0x00000000,
6416                                            0x00000000};
6417 
6418   static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6419                                            0xbfc5555555555543,
6420                                            0x3f8111111110f30c,
6421                                            0xbf2a01a019b92fc6,
6422                                            0x3ec71de351f3d22b,
6423                                            0xbe5ae5e2b60f7b91,
6424                                            0x3de5d8408868552f,
6425                                            0x0000000000000000,
6426                                            0x3ff0000000000000,
6427                                            0xbfe0000000000000,
6428                                            0x3fa5555555555536,
6429                                            0xbf56c16c16c13a0b,
6430                                            0x3efa01a019b1e8d8,
6431                                            0xbe927e4f7282f468,
6432                                            0x3e21ee96d2641b13,
6433                                            0xbda8f76380fbb401};
6434   VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6435   VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6436   VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6437 
6438   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6439     FTMaddHelper<SimFloat16>(vform,
6440                              dst,
6441                              src1,
6442                              src2,
6443                              ftmad_coeff16[index],
6444                              ftmad_coeff16[index + 8]);
6445   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6446     FTMaddHelper<float>(vform,
6447                         dst,
6448                         src1,
6449                         src2,
6450                         ftmad_coeff32[index],
6451                         ftmad_coeff32[index + 8]);
6452   } else {
6453     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6454     FTMaddHelper<double>(vform,
6455                          dst,
6456                          src1,
6457                          src2,
6458                          ftmad_coeff64[index],
6459                          ftmad_coeff64[index + 8]);
6460   }
6461   return dst;
6462 }
6463 
fexpa(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6464 LogicVRegister Simulator::fexpa(VectorFormat vform,
6465                                 LogicVRegister dst,
6466                                 const LogicVRegister& src) {
6467   static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6468                                            0x005d, 0x0075, 0x008e, 0x00a8,
6469                                            0x00c2, 0x00dc, 0x00f8, 0x0114,
6470                                            0x0130, 0x014d, 0x016b, 0x0189,
6471                                            0x01a8, 0x01c8, 0x01e8, 0x0209,
6472                                            0x022b, 0x024e, 0x0271, 0x0295,
6473                                            0x02ba, 0x02e0, 0x0306, 0x032e,
6474                                            0x0356, 0x037f, 0x03a9, 0x03d4};
6475 
6476   static const uint64_t fexpa_coeff32[] =
6477       {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6478        0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6479        0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6480        0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6481        0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6482        0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6483        0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6484        0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6485        0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6486        0x7d3e0c};
6487 
6488   static const uint64_t fexpa_coeff64[] =
6489       {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6490        0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6491        0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6492        0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6493        0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6494        0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6495        0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6496        0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6497        0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6498        0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6499        0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6500        0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6501        0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6502        0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6503        0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6504        0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6505 
6506   unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6507   int index_highbit = 5;
6508   int op_highbit, op_shift;
6509   const uint64_t* fexpa_coeff;
6510 
6511   if (lane_size == kHRegSize) {
6512     index_highbit = 4;
6513     VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
6514     fexpa_coeff = fexpa_coeff16;
6515     op_highbit = 9;
6516     op_shift = 10;
6517   } else if (lane_size == kSRegSize) {
6518     VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
6519     fexpa_coeff = fexpa_coeff32;
6520     op_highbit = 13;
6521     op_shift = 23;
6522   } else {
6523     VIXL_ASSERT(lane_size == kDRegSize);
6524     VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
6525     fexpa_coeff = fexpa_coeff64;
6526     op_highbit = 16;
6527     op_shift = 52;
6528   }
6529 
6530   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6531     uint64_t op = src.Uint(vform, i);
6532     uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6533     result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6534     dst.SetUint(vform, i, result);
6535   }
6536   return dst;
6537 }
6538 
6539 template <typename T>
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6540 LogicVRegister Simulator::fscale(VectorFormat vform,
6541                                  LogicVRegister dst,
6542                                  const LogicVRegister& src1,
6543                                  const LogicVRegister& src2) {
6544   T two = T(2.0);
6545   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6546     T s1 = src1.Float<T>(i);
6547     if (!IsNaN(s1)) {
6548       int64_t scale = src2.Int(vform, i);
6549       // TODO: this is a low-performance implementation, but it's simple and
6550       // less likely to be buggy. Consider replacing it with something faster.
6551 
6552       // Scales outside of these bounds become infinity or zero, so there's no
6553       // point iterating further.
6554       scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6555 
6556       // Compute s1 * 2 ^ scale. If scale is positive, multiply by two and
6557       // decrement scale until it's zero.
6558       while (scale-- > 0) {
6559         s1 = FPMul(s1, two);
6560       }
6561 
6562       // If scale is negative, divide by two and increment scale until it's
6563       // zero. Initially, scale is (src2 - 1), so we pre-increment.
6564       while (++scale < 0) {
6565         s1 = FPDiv(s1, two);
6566       }
6567     }
6568     dst.SetFloat<T>(i, s1);
6569   }
6570   return dst;
6571 }
6572 
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6573 LogicVRegister Simulator::fscale(VectorFormat vform,
6574                                  LogicVRegister dst,
6575                                  const LogicVRegister& src1,
6576                                  const LogicVRegister& src2) {
6577   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6578     fscale<SimFloat16>(vform, dst, src1, src2);
6579   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6580     fscale<float>(vform, dst, src1, src2);
6581   } else {
6582     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6583     fscale<double>(vform, dst, src1, src2);
6584   }
6585   return dst;
6586 }
6587 
scvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6588 LogicVRegister Simulator::scvtf(VectorFormat vform,
6589                                 unsigned dst_data_size_in_bits,
6590                                 unsigned src_data_size_in_bits,
6591                                 LogicVRegister dst,
6592                                 const LogicPRegister& pg,
6593                                 const LogicVRegister& src,
6594                                 FPRounding round,
6595                                 int fbits) {
6596   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6597   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6598 
6599   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6600     if (!pg.IsActive(vform, i)) continue;
6601 
6602     int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6603                                             0,
6604                                             src.Uint(vform, i));
6605 
6606     switch (dst_data_size_in_bits) {
6607       case kHRegSize: {
6608         SimFloat16 result = FixedToFloat16(value, fbits, round);
6609         dst.SetUint(vform, i, Float16ToRawbits(result));
6610         break;
6611       }
6612       case kSRegSize: {
6613         float result = FixedToFloat(value, fbits, round);
6614         dst.SetUint(vform, i, FloatToRawbits(result));
6615         break;
6616       }
6617       case kDRegSize: {
6618         double result = FixedToDouble(value, fbits, round);
6619         dst.SetUint(vform, i, DoubleToRawbits(result));
6620         break;
6621       }
6622       default:
6623         VIXL_UNIMPLEMENTED();
6624         break;
6625     }
6626   }
6627 
6628   return dst;
6629 }
6630 
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6631 LogicVRegister Simulator::scvtf(VectorFormat vform,
6632                                 LogicVRegister dst,
6633                                 const LogicVRegister& src,
6634                                 int fbits,
6635                                 FPRounding round) {
6636   return scvtf(vform,
6637                LaneSizeInBitsFromFormat(vform),
6638                LaneSizeInBitsFromFormat(vform),
6639                dst,
6640                GetPTrue(),
6641                src,
6642                round,
6643                fbits);
6644 }
6645 
ucvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6646 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6647                                 unsigned dst_data_size_in_bits,
6648                                 unsigned src_data_size_in_bits,
6649                                 LogicVRegister dst,
6650                                 const LogicPRegister& pg,
6651                                 const LogicVRegister& src,
6652                                 FPRounding round,
6653                                 int fbits) {
6654   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6655   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6656 
6657   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6658     if (!pg.IsActive(vform, i)) continue;
6659 
6660     uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6661                                                0,
6662                                                src.Uint(vform, i));
6663 
6664     switch (dst_data_size_in_bits) {
6665       case kHRegSize: {
6666         SimFloat16 result = UFixedToFloat16(value, fbits, round);
6667         dst.SetUint(vform, i, Float16ToRawbits(result));
6668         break;
6669       }
6670       case kSRegSize: {
6671         float result = UFixedToFloat(value, fbits, round);
6672         dst.SetUint(vform, i, FloatToRawbits(result));
6673         break;
6674       }
6675       case kDRegSize: {
6676         double result = UFixedToDouble(value, fbits, round);
6677         dst.SetUint(vform, i, DoubleToRawbits(result));
6678         break;
6679       }
6680       default:
6681         VIXL_UNIMPLEMENTED();
6682         break;
6683     }
6684   }
6685 
6686   return dst;
6687 }
6688 
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6689 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6690                                 LogicVRegister dst,
6691                                 const LogicVRegister& src,
6692                                 int fbits,
6693                                 FPRounding round) {
6694   return ucvtf(vform,
6695                LaneSizeInBitsFromFormat(vform),
6696                LaneSizeInBitsFromFormat(vform),
6697                dst,
6698                GetPTrue(),
6699                src,
6700                round,
6701                fbits);
6702 }
6703 
unpk(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,UnpackType unpack_type,ExtendType extend_type)6704 LogicVRegister Simulator::unpk(VectorFormat vform,
6705                                LogicVRegister dst,
6706                                const LogicVRegister& src,
6707                                UnpackType unpack_type,
6708                                ExtendType extend_type) {
6709   VectorFormat vform_half = VectorFormatHalfWidth(vform);
6710   const int lane_count = LaneCountFromFormat(vform);
6711   const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
6712 
6713   switch (extend_type) {
6714     case kSignedExtend: {
6715       int64_t result[kZRegMaxSizeInBytes];
6716       for (int i = 0; i < lane_count; ++i) {
6717         result[i] = src.Int(vform_half, i + src_start_lane);
6718       }
6719       for (int i = 0; i < lane_count; ++i) {
6720         dst.SetInt(vform, i, result[i]);
6721       }
6722       break;
6723     }
6724     case kUnsignedExtend: {
6725       uint64_t result[kZRegMaxSizeInBytes];
6726       for (int i = 0; i < lane_count; ++i) {
6727         result[i] = src.Uint(vform_half, i + src_start_lane);
6728       }
6729       for (int i = 0; i < lane_count; ++i) {
6730         dst.SetUint(vform, i, result[i]);
6731       }
6732       break;
6733     }
6734     default:
6735       VIXL_UNREACHABLE();
6736   }
6737   return dst;
6738 }
6739 
SVEIntCompareVectorsHelper(Condition cond,VectorFormat vform,LogicPRegister dst,const LogicPRegister & mask,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements,FlagsUpdate flags)6740 LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
6741                                                      VectorFormat vform,
6742                                                      LogicPRegister dst,
6743                                                      const LogicPRegister& mask,
6744                                                      const LogicVRegister& src1,
6745                                                      const LogicVRegister& src2,
6746                                                      bool is_wide_elements,
6747                                                      FlagsUpdate flags) {
6748   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
6749     bool result = false;
6750     if (mask.IsActive(vform, lane)) {
6751       int64_t op1 = 0xbadbeef;
6752       int64_t op2 = 0xbadbeef;
6753       int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
6754       switch (cond) {
6755         case eq:
6756         case ge:
6757         case gt:
6758         case lt:
6759         case le:
6760         case ne:
6761           op1 = src1.Int(vform, lane);
6762           op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
6763                                  : src2.Int(vform, lane);
6764           break;
6765         case hi:
6766         case hs:
6767         case ls:
6768         case lo:
6769           op1 = src1.Uint(vform, lane);
6770           op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
6771                                  : src2.Uint(vform, lane);
6772           break;
6773         default:
6774           VIXL_UNREACHABLE();
6775       }
6776 
6777       switch (cond) {
6778         case eq:
6779           result = (op1 == op2);
6780           break;
6781         case ne:
6782           result = (op1 != op2);
6783           break;
6784         case ge:
6785           result = (op1 >= op2);
6786           break;
6787         case gt:
6788           result = (op1 > op2);
6789           break;
6790         case le:
6791           result = (op1 <= op2);
6792           break;
6793         case lt:
6794           result = (op1 < op2);
6795           break;
6796         case hs:
6797           result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
6798           break;
6799         case hi:
6800           result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
6801           break;
6802         case ls:
6803           result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
6804           break;
6805         case lo:
6806           result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
6807           break;
6808         default:
6809           VIXL_UNREACHABLE();
6810       }
6811     }
6812     dst.SetActive(vform, lane, result);
6813   }
6814 
6815   if (flags == SetFlags) PredTest(vform, mask, dst);
6816 
6817   return dst;
6818 }
6819 
SVEBitwiseShiftHelper(Shift shift_op,VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements)6820 LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
6821                                                 VectorFormat vform,
6822                                                 LogicVRegister dst,
6823                                                 const LogicVRegister& src1,
6824                                                 const LogicVRegister& src2,
6825                                                 bool is_wide_elements) {
6826   unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6827   VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
6828 
6829   for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
6830     int shift_src_lane = lane;
6831     if (is_wide_elements) {
6832       // If the shift amount comes from wide elements, select the D-sized lane
6833       // which occupies the corresponding lanes of the value to be shifted.
6834       shift_src_lane = (lane * lane_size) / kDRegSize;
6835     }
6836     uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
6837 
6838     // Saturate shift_amount to the size of the lane that will be shifted.
6839     if (shift_amount > lane_size) shift_amount = lane_size;
6840 
6841     uint64_t value = src1.Uint(vform, lane);
6842     int64_t result = ShiftOperand(lane_size,
6843                                   value,
6844                                   shift_op,
6845                                   static_cast<unsigned>(shift_amount));
6846     dst.SetUint(vform, lane, result);
6847   }
6848 
6849   return dst;
6850 }
6851 
asrd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int shift)6852 LogicVRegister Simulator::asrd(VectorFormat vform,
6853                                LogicVRegister dst,
6854                                const LogicVRegister& src1,
6855                                int shift) {
6856   VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
6857                               LaneSizeInBitsFromFormat(vform)));
6858 
6859   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6860     int64_t value = src1.Int(vform, i);
6861     if (shift <= 63) {
6862       if (value < 0) {
6863         // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
6864         // cast to int64_t, and cannot cause signed overflow in the result.
6865         value = value + GetUintMask(shift);
6866       }
6867       value = ShiftOperand(kDRegSize, value, ASR, shift);
6868     } else {
6869       value = 0;
6870     }
6871     dst.SetInt(vform, i, value);
6872   }
6873   return dst;
6874 }
6875 
SVEBitwiseLogicalUnpredicatedHelper(LogicalOp logical_op,VectorFormat vform,LogicVRegister zd,const LogicVRegister & zn,const LogicVRegister & zm)6876 LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
6877     LogicalOp logical_op,
6878     VectorFormat vform,
6879     LogicVRegister zd,
6880     const LogicVRegister& zn,
6881     const LogicVRegister& zm) {
6882   VIXL_ASSERT(IsSVEFormat(vform));
6883   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6884     uint64_t op1 = zn.Uint(vform, i);
6885     uint64_t op2 = zm.Uint(vform, i);
6886     uint64_t result;
6887     switch (logical_op) {
6888       case AND:
6889         result = op1 & op2;
6890         break;
6891       case BIC:
6892         result = op1 & ~op2;
6893         break;
6894       case EOR:
6895         result = op1 ^ op2;
6896         break;
6897       case ORR:
6898         result = op1 | op2;
6899         break;
6900       default:
6901         result = 0;
6902         VIXL_UNIMPLEMENTED();
6903     }
6904     zd.SetUint(vform, i, result);
6905   }
6906 
6907   return zd;
6908 }
6909 
SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,LogicPRegister pd,const LogicPRegister & pn,const LogicPRegister & pm)6910 LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
6911                                                     LogicPRegister pd,
6912                                                     const LogicPRegister& pn,
6913                                                     const LogicPRegister& pm) {
6914   for (int i = 0; i < pn.GetChunkCount(); i++) {
6915     LogicPRegister::ChunkType op1 = pn.GetChunk(i);
6916     LogicPRegister::ChunkType op2 = pm.GetChunk(i);
6917     LogicPRegister::ChunkType result;
6918     switch (op) {
6919       case ANDS_p_p_pp_z:
6920       case AND_p_p_pp_z:
6921         result = op1 & op2;
6922         break;
6923       case BICS_p_p_pp_z:
6924       case BIC_p_p_pp_z:
6925         result = op1 & ~op2;
6926         break;
6927       case EORS_p_p_pp_z:
6928       case EOR_p_p_pp_z:
6929         result = op1 ^ op2;
6930         break;
6931       case NANDS_p_p_pp_z:
6932       case NAND_p_p_pp_z:
6933         result = ~(op1 & op2);
6934         break;
6935       case NORS_p_p_pp_z:
6936       case NOR_p_p_pp_z:
6937         result = ~(op1 | op2);
6938         break;
6939       case ORNS_p_p_pp_z:
6940       case ORN_p_p_pp_z:
6941         result = op1 | ~op2;
6942         break;
6943       case ORRS_p_p_pp_z:
6944       case ORR_p_p_pp_z:
6945         result = op1 | op2;
6946         break;
6947       default:
6948         result = 0;
6949         VIXL_UNIMPLEMENTED();
6950     }
6951     pd.SetChunk(i, result);
6952   }
6953   return pd;
6954 }
6955 
SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,VectorFormat vform,LogicVRegister zd,uint64_t imm)6956 LogicVRegister Simulator::SVEBitwiseImmHelper(
6957     SVEBitwiseLogicalWithImm_UnpredicatedOp op,
6958     VectorFormat vform,
6959     LogicVRegister zd,
6960     uint64_t imm) {
6961   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6962     uint64_t op1 = zd.Uint(vform, i);
6963     uint64_t result;
6964     switch (op) {
6965       case AND_z_zi:
6966         result = op1 & imm;
6967         break;
6968       case EOR_z_zi:
6969         result = op1 ^ imm;
6970         break;
6971       case ORR_z_zi:
6972         result = op1 | imm;
6973         break;
6974       default:
6975         result = 0;
6976         VIXL_UNIMPLEMENTED();
6977     }
6978     zd.SetUint(vform, i, result);
6979   }
6980 
6981   return zd;
6982 }
6983 
SVEStructuredStoreHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr)6984 void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
6985                                          const LogicPRegister& pg,
6986                                          unsigned zt_code,
6987                                          const LogicSVEAddressVector& addr) {
6988   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
6989 
6990   int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
6991   int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
6992   int msize_in_bytes = addr.GetMsizeInBytes();
6993   int reg_count = addr.GetRegCount();
6994 
6995   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
6996   VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
6997 
6998   unsigned zt_codes[4] = {zt_code,
6999                           (zt_code + 1) % kNumberOfZRegisters,
7000                           (zt_code + 2) % kNumberOfZRegisters,
7001                           (zt_code + 3) % kNumberOfZRegisters};
7002 
7003   LogicVRegister zt[4] = {
7004       ReadVRegister(zt_codes[0]),
7005       ReadVRegister(zt_codes[1]),
7006       ReadVRegister(zt_codes[2]),
7007       ReadVRegister(zt_codes[3]),
7008   };
7009 
7010   // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7011   // are ignored, so read the source register using the VectorFormat that
7012   // corresponds with the storage format, and multiply the index accordingly.
7013   VectorFormat unpack_vform =
7014       SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7015   int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7016 
7017   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7018     if (!pg.IsActive(vform, i)) continue;
7019 
7020     for (int r = 0; r < reg_count; r++) {
7021       uint64_t element_address = addr.GetElementAddress(i, r);
7022       StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address);
7023     }
7024   }
7025 
7026   if (ShouldTraceWrites()) {
7027     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7028     if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7029       // Use an FP format where it's likely that we're accessing FP data.
7030       format = GetPrintRegisterFormatTryFP(format);
7031     }
7032     // Stores don't represent a change to the source register's value, so only
7033     // print the relevant part of the value.
7034     format = GetPrintRegPartial(format);
7035 
7036     PrintZStructAccess(zt_code,
7037                        reg_count,
7038                        pg,
7039                        format,
7040                        msize_in_bytes,
7041                        "->",
7042                        addr);
7043   }
7044 }
7045 
SVEStructuredLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,bool is_signed)7046 void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7047                                         const LogicPRegister& pg,
7048                                         unsigned zt_code,
7049                                         const LogicSVEAddressVector& addr,
7050                                         bool is_signed) {
7051   int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7052   int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7053   int msize_in_bytes = addr.GetMsizeInBytes();
7054   int reg_count = addr.GetRegCount();
7055 
7056   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7057   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7058   VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7059 
7060   unsigned zt_codes[4] = {zt_code,
7061                           (zt_code + 1) % kNumberOfZRegisters,
7062                           (zt_code + 2) % kNumberOfZRegisters,
7063                           (zt_code + 3) % kNumberOfZRegisters};
7064   LogicVRegister zt[4] = {
7065       ReadVRegister(zt_codes[0]),
7066       ReadVRegister(zt_codes[1]),
7067       ReadVRegister(zt_codes[2]),
7068       ReadVRegister(zt_codes[3]),
7069   };
7070 
7071   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7072     for (int r = 0; r < reg_count; r++) {
7073       uint64_t element_address = addr.GetElementAddress(i, r);
7074 
7075       if (!pg.IsActive(vform, i)) {
7076         zt[r].SetUint(vform, i, 0);
7077         continue;
7078       }
7079 
7080       if (is_signed) {
7081         LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address);
7082       } else {
7083         LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address);
7084       }
7085     }
7086   }
7087 
7088   if (ShouldTraceVRegs()) {
7089     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7090     if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7091       // Use an FP format where it's likely that we're accessing FP data.
7092       format = GetPrintRegisterFormatTryFP(format);
7093     }
7094     PrintZStructAccess(zt_code,
7095                        reg_count,
7096                        pg,
7097                        format,
7098                        msize_in_bytes,
7099                        "<-",
7100                        addr);
7101   }
7102 }
7103 
brka(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7104 LogicPRegister Simulator::brka(LogicPRegister pd,
7105                                const LogicPRegister& pg,
7106                                const LogicPRegister& pn) {
7107   bool break_ = false;
7108   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7109     if (pg.IsActive(kFormatVnB, i)) {
7110       pd.SetActive(kFormatVnB, i, !break_);
7111       break_ |= pn.IsActive(kFormatVnB, i);
7112     }
7113   }
7114 
7115   return pd;
7116 }
7117 
brkb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7118 LogicPRegister Simulator::brkb(LogicPRegister pd,
7119                                const LogicPRegister& pg,
7120                                const LogicPRegister& pn) {
7121   bool break_ = false;
7122   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7123     if (pg.IsActive(kFormatVnB, i)) {
7124       break_ |= pn.IsActive(kFormatVnB, i);
7125       pd.SetActive(kFormatVnB, i, !break_);
7126     }
7127   }
7128 
7129   return pd;
7130 }
7131 
brkn(LogicPRegister pdm,const LogicPRegister & pg,const LogicPRegister & pn)7132 LogicPRegister Simulator::brkn(LogicPRegister pdm,
7133                                const LogicPRegister& pg,
7134                                const LogicPRegister& pn) {
7135   if (!IsLastActive(kFormatVnB, pg, pn)) {
7136     pfalse(pdm);
7137   }
7138   return pdm;
7139 }
7140 
brkpa(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7141 LogicPRegister Simulator::brkpa(LogicPRegister pd,
7142                                 const LogicPRegister& pg,
7143                                 const LogicPRegister& pn,
7144                                 const LogicPRegister& pm) {
7145   bool last_active = IsLastActive(kFormatVnB, pg, pn);
7146 
7147   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7148     bool active = false;
7149     if (pg.IsActive(kFormatVnB, i)) {
7150       active = last_active;
7151       last_active = last_active && !pm.IsActive(kFormatVnB, i);
7152     }
7153     pd.SetActive(kFormatVnB, i, active);
7154   }
7155 
7156   return pd;
7157 }
7158 
brkpb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7159 LogicPRegister Simulator::brkpb(LogicPRegister pd,
7160                                 const LogicPRegister& pg,
7161                                 const LogicPRegister& pn,
7162                                 const LogicPRegister& pm) {
7163   bool last_active = IsLastActive(kFormatVnB, pg, pn);
7164 
7165   for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7166     bool active = false;
7167     if (pg.IsActive(kFormatVnB, i)) {
7168       last_active = last_active && !pm.IsActive(kFormatVnB, i);
7169       active = last_active;
7170     }
7171     pd.SetActive(kFormatVnB, i, active);
7172   }
7173 
7174   return pd;
7175 }
7176 
SVEFaultTolerantLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,SVEFaultTolerantLoadType type,bool is_signed)7177 void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7178                                            const LogicPRegister& pg,
7179                                            unsigned zt_code,
7180                                            const LogicSVEAddressVector& addr,
7181                                            SVEFaultTolerantLoadType type,
7182                                            bool is_signed) {
7183   int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7184   int msize_in_bits = addr.GetMsizeInBits();
7185   int msize_in_bytes = addr.GetMsizeInBytes();
7186 
7187   VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7188   VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7189   VIXL_ASSERT(addr.GetRegCount() == 1);
7190 
7191   LogicVRegister zt = ReadVRegister(zt_code);
7192   LogicPRegister ffr = ReadFFR();
7193 
7194   // Non-faulting loads are allowed to fail arbitrarily. To stress user
7195   // code, fail a random element in roughly one in eight full-vector loads.
7196   uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
7197   int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7198 
7199   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7200     uint64_t value = 0;
7201 
7202     if (pg.IsActive(vform, i)) {
7203       uint64_t element_address = addr.GetElementAddress(i, 0);
7204 
7205       if (type == kSVEFirstFaultLoad) {
7206         // First-faulting loads always load the first active element, regardless
7207         // of FFR. The result will be discarded if its FFR lane is inactive, but
7208         // it could still generate a fault.
7209         value = MemReadUint(msize_in_bytes, element_address);
7210         // All subsequent elements have non-fault semantics.
7211         type = kSVENonFaultLoad;
7212 
7213       } else if (ffr.IsActive(vform, i)) {
7214         // Simulation of fault-tolerant loads relies on system calls, and is
7215         // likely to be relatively slow, so we only actually perform the load if
7216         // its FFR lane is active.
7217 
7218         bool can_read = (i < fake_fault_at_lane) &&
7219                         CanReadMemory(element_address, msize_in_bytes);
7220         if (can_read) {
7221           value = MemReadUint(msize_in_bytes, element_address);
7222         } else {
7223           // Propagate the fault to the end of FFR.
7224           for (int j = i; j < LaneCountFromFormat(vform); j++) {
7225             ffr.SetActive(vform, j, false);
7226           }
7227         }
7228       }
7229     }
7230 
7231     // The architecture permits a few possible results for inactive FFR lanes
7232     // (including those caused by a fault in this instruction). We choose to
7233     // leave the register value unchanged (like merging predication) because
7234     // no other input to this instruction can have the same behaviour.
7235     //
7236     // Note that this behaviour takes precedence over pg's zeroing predication.
7237 
7238     if (ffr.IsActive(vform, i)) {
7239       int msb = msize_in_bits - 1;
7240       if (is_signed) {
7241         zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7242       } else {
7243         zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7244       }
7245     }
7246   }
7247 
7248   if (ShouldTraceVRegs()) {
7249     PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7250     if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7251       // Use an FP format where it's likely that we're accessing FP data.
7252       format = GetPrintRegisterFormatTryFP(format);
7253     }
7254     // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7255     // expects a single mask, so combine the two predicates.
7256     SimPRegister mask;
7257     SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7258     PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7259   }
7260 }
7261 
SVEGatherLoadScalarPlusVectorHelper(const Instruction * instr,VectorFormat vform,SVEOffsetModifier mod)7262 void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7263                                                     VectorFormat vform,
7264                                                     SVEOffsetModifier mod) {
7265   bool is_signed = instr->ExtractBit(14) == 0;
7266   bool is_ff = instr->ExtractBit(13) == 1;
7267   // Note that these instructions don't use the Dtype encoding.
7268   int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7269   int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7270   uint64_t base = ReadXRegister(instr->GetRn());
7271   LogicSVEAddressVector addr(base,
7272                              &ReadVRegister(instr->GetRm()),
7273                              vform,
7274                              mod,
7275                              scale);
7276   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7277   if (is_ff) {
7278     SVEFaultTolerantLoadHelper(vform,
7279                                ReadPRegister(instr->GetPgLow8()),
7280                                instr->GetRt(),
7281                                addr,
7282                                kSVEFirstFaultLoad,
7283                                is_signed);
7284   } else {
7285     SVEStructuredLoadHelper(vform,
7286                             ReadPRegister(instr->GetPgLow8()),
7287                             instr->GetRt(),
7288                             addr,
7289                             is_signed);
7290   }
7291 }
7292 
GetFirstActive(VectorFormat vform,const LogicPRegister & pg) const7293 int Simulator::GetFirstActive(VectorFormat vform,
7294                               const LogicPRegister& pg) const {
7295   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7296     if (pg.IsActive(vform, i)) return i;
7297   }
7298   return -1;
7299 }
7300 
GetLastActive(VectorFormat vform,const LogicPRegister & pg) const7301 int Simulator::GetLastActive(VectorFormat vform,
7302                              const LogicPRegister& pg) const {
7303   for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7304     if (pg.IsActive(vform, i)) return i;
7305   }
7306   return -1;
7307 }
7308 
CountActiveLanes(VectorFormat vform,const LogicPRegister & pg) const7309 int Simulator::CountActiveLanes(VectorFormat vform,
7310                                 const LogicPRegister& pg) const {
7311   int count = 0;
7312   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7313     count += pg.IsActive(vform, i) ? 1 : 0;
7314   }
7315   return count;
7316 }
7317 
CountActiveAndTrueLanes(VectorFormat vform,const LogicPRegister & pg,const LogicPRegister & pn) const7318 int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7319                                        const LogicPRegister& pg,
7320                                        const LogicPRegister& pn) const {
7321   int count = 0;
7322   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7323     count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7324   }
7325   return count;
7326 }
7327 
GetPredicateConstraintLaneCount(VectorFormat vform,int pattern) const7328 int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7329                                                int pattern) const {
7330   VIXL_ASSERT(IsSVEFormat(vform));
7331   int all = LaneCountFromFormat(vform);
7332   VIXL_ASSERT(all > 0);
7333 
7334   switch (pattern) {
7335     case SVE_VL1:
7336     case SVE_VL2:
7337     case SVE_VL3:
7338     case SVE_VL4:
7339     case SVE_VL5:
7340     case SVE_VL6:
7341     case SVE_VL7:
7342     case SVE_VL8:
7343       // VL1-VL8 are encoded directly.
7344       VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7345       VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7346       return (pattern <= all) ? pattern : 0;
7347     case SVE_VL16:
7348     case SVE_VL32:
7349     case SVE_VL64:
7350     case SVE_VL128:
7351     case SVE_VL256: {
7352       // VL16-VL256 are encoded as log2(N) + c.
7353       int min = 16 << (pattern - SVE_VL16);
7354       return (min <= all) ? min : 0;
7355     }
7356     // Special cases.
7357     case SVE_POW2:
7358       return 1 << HighestSetBitPosition(all);
7359     case SVE_MUL4:
7360       return all - (all % 4);
7361     case SVE_MUL3:
7362       return all - (all % 3);
7363     case SVE_ALL:
7364       return all;
7365   }
7366   // Unnamed cases archicturally return 0.
7367   return 0;
7368 }
7369 
GetStructAddress(int lane) const7370 uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7371   if (IsContiguous()) {
7372     return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7373   }
7374 
7375   VIXL_ASSERT(IsScatterGather());
7376   VIXL_ASSERT(vector_ != NULL);
7377 
7378   // For scatter-gather accesses, we need to extract the offset from vector_,
7379   // and apply modifiers.
7380 
7381   uint64_t offset = 0;
7382   switch (vector_form_) {
7383     case kFormatVnS:
7384       offset = vector_->GetLane<uint32_t>(lane);
7385       break;
7386     case kFormatVnD:
7387       offset = vector_->GetLane<uint64_t>(lane);
7388       break;
7389     default:
7390       VIXL_UNIMPLEMENTED();
7391       break;
7392   }
7393 
7394   switch (vector_mod_) {
7395     case SVE_MUL_VL:
7396       VIXL_UNIMPLEMENTED();
7397       break;
7398     case SVE_LSL:
7399       // We apply the shift below. There's nothing to do here.
7400       break;
7401     case NO_SVE_OFFSET_MODIFIER:
7402       VIXL_ASSERT(vector_shift_ == 0);
7403       break;
7404     case SVE_UXTW:
7405       offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7406       break;
7407     case SVE_SXTW:
7408       offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7409       break;
7410   }
7411 
7412   return base_ + (offset << vector_shift_);
7413 }
7414 
7415 
7416 }  // namespace aarch64
7417 }  // namespace vixl
7418 
7419 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
7420