1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "base/bit_utils_iterator.h"
21 #include "mirror/array-inl.h"
22 #include "mirror/string.h"
23 
24 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
25 
26 namespace art {
27 namespace arm64 {
28 
29 using helpers::DRegisterFrom;
30 using helpers::HeapOperand;
31 using helpers::InputRegisterAt;
32 using helpers::Int64FromLocation;
33 using helpers::LocationFrom;
34 using helpers::OutputRegister;
35 using helpers::QRegisterFrom;
36 using helpers::StackOperandFrom;
37 using helpers::SveStackOperandFrom;
38 using helpers::VRegisterFrom;
39 using helpers::ZRegisterFrom;
40 using helpers::XRegisterFrom;
41 
42 #define __ GetVIXLAssembler()->
43 
44 // Returns whether the value of the constant can be directly encoded into the instruction as
45 // immediate.
SVECanEncodeConstantAsImmediate(HConstant * constant,HInstruction * instr)46 static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
47   if (instr->IsVecReplicateScalar()) {
48     if (constant->IsLongConstant()) {
49       return false;
50     } else if (constant->IsFloatConstant()) {
51       return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
52     } else if (constant->IsDoubleConstant()) {
53       return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
54     }
55     // TODO: Make use of shift part of DUP instruction.
56     int64_t value = CodeGenerator::GetInt64ValueOf(constant);
57     return IsInt<8>(value);
58   }
59 
60   return false;
61 }
62 
63 // Returns
64 //  - constant location - if 'constant' is an actual constant and its value can be
65 //    encoded into the instruction.
66 //  - register location otherwise.
SVEEncodableConstantOrRegister(HInstruction * constant,HInstruction * instr)67 inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
68   if (constant->IsConstant()
69       && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
70     return Location::ConstantLocation(constant->AsConstant());
71   }
72 
73   return Location::RequiresRegister();
74 }
75 
ValidateVectorLength(HVecOperation * instr) const76 void InstructionCodeGeneratorARM64Sve::ValidateVectorLength(HVecOperation* instr) const {
77   DCHECK_EQ(DataType::Size(instr->GetPackedType()) * instr->GetVectorLength(),
78             codegen_->GetSIMDRegisterWidth());
79 }
80 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)81 void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
82   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
83   HInstruction* input = instruction->InputAt(0);
84   switch (instruction->GetPackedType()) {
85     case DataType::Type::kBool:
86     case DataType::Type::kUint8:
87     case DataType::Type::kInt8:
88     case DataType::Type::kUint16:
89     case DataType::Type::kInt16:
90     case DataType::Type::kInt32:
91     case DataType::Type::kInt64:
92       locations->SetInAt(0, SVEEncodableConstantOrRegister(input, instruction));
93       locations->SetOut(Location::RequiresFpuRegister());
94       break;
95     case DataType::Type::kFloat32:
96     case DataType::Type::kFloat64:
97       if (input->IsConstant() &&
98           SVECanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
99         locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
100         locations->SetOut(Location::RequiresFpuRegister());
101       } else {
102         locations->SetInAt(0, Location::RequiresFpuRegister());
103         locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
104       }
105       break;
106     default:
107       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
108       UNREACHABLE();
109   }
110 }
111 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)112 void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
113   DCHECK(instruction->IsPredicated());
114   LocationSummary* locations = instruction->GetLocations();
115   Location src_loc = locations->InAt(0);
116   const ZRegister dst = ZRegisterFrom(locations->Out());
117   ValidateVectorLength(instruction);
118   switch (instruction->GetPackedType()) {
119     case DataType::Type::kBool:
120     case DataType::Type::kUint8:
121     case DataType::Type::kInt8:
122       if (src_loc.IsConstant()) {
123         __ Dup(dst.VnB(), Int64FromLocation(src_loc));
124       } else {
125         __ Dup(dst.VnB(), InputRegisterAt(instruction, 0));
126       }
127       break;
128     case DataType::Type::kUint16:
129     case DataType::Type::kInt16:
130       if (src_loc.IsConstant()) {
131         __ Dup(dst.VnH(), Int64FromLocation(src_loc));
132       } else {
133         __ Dup(dst.VnH(), InputRegisterAt(instruction, 0));
134       }
135       break;
136     case DataType::Type::kInt32:
137       if (src_loc.IsConstant()) {
138         __ Dup(dst.VnS(), Int64FromLocation(src_loc));
139       } else {
140         __ Dup(dst.VnS(), InputRegisterAt(instruction, 0));
141       }
142       break;
143     case DataType::Type::kInt64:
144       if (src_loc.IsConstant()) {
145         __ Dup(dst.VnD(), Int64FromLocation(src_loc));
146       } else {
147         __ Dup(dst.VnD(), XRegisterFrom(src_loc));
148       }
149       break;
150     case DataType::Type::kFloat32:
151       if (src_loc.IsConstant()) {
152         __ Fdup(dst.VnS(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
153       } else {
154         __ Dup(dst.VnS(), ZRegisterFrom(src_loc).VnS(), 0);
155       }
156       break;
157     case DataType::Type::kFloat64:
158       if (src_loc.IsConstant()) {
159         __ Fdup(dst.VnD(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
160       } else {
161         __ Dup(dst.VnD(), ZRegisterFrom(src_loc).VnD(), 0);
162       }
163       break;
164     default:
165       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
166       UNREACHABLE();
167   }
168 }
169 
VisitVecExtractScalar(HVecExtractScalar * instruction)170 void LocationsBuilderARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
171   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
172   switch (instruction->GetPackedType()) {
173     case DataType::Type::kBool:
174     case DataType::Type::kUint8:
175     case DataType::Type::kInt8:
176     case DataType::Type::kUint16:
177     case DataType::Type::kInt16:
178     case DataType::Type::kInt32:
179     case DataType::Type::kInt64:
180       locations->SetInAt(0, Location::RequiresFpuRegister());
181       locations->SetOut(Location::RequiresRegister());
182       break;
183     case DataType::Type::kFloat32:
184     case DataType::Type::kFloat64:
185       locations->SetInAt(0, Location::RequiresFpuRegister());
186       locations->SetOut(Location::SameAsFirstInput());
187       break;
188     default:
189       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
190       UNREACHABLE();
191   }
192 }
193 
VisitVecExtractScalar(HVecExtractScalar * instruction)194 void InstructionCodeGeneratorARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
195   DCHECK(instruction->IsPredicated());
196   LocationSummary* locations = instruction->GetLocations();
197   const VRegister src = VRegisterFrom(locations->InAt(0));
198   ValidateVectorLength(instruction);
199   switch (instruction->GetPackedType()) {
200     case DataType::Type::kInt32:
201       __ Umov(OutputRegister(instruction), src.V4S(), 0);
202       break;
203     case DataType::Type::kInt64:
204       __ Umov(OutputRegister(instruction), src.V2D(), 0);
205       break;
206     case DataType::Type::kFloat32:
207     case DataType::Type::kFloat64:
208       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
209       break;
210     default:
211       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
212       UNREACHABLE();
213   }
214 }
215 
216 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)217 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
218   LocationSummary* locations = new (allocator) LocationSummary(instruction);
219   switch (instruction->GetPackedType()) {
220     case DataType::Type::kBool:
221       locations->SetInAt(0, Location::RequiresFpuRegister());
222       locations->SetOut(Location::RequiresFpuRegister(),
223                         instruction->IsVecNot() ? Location::kOutputOverlap
224                                                 : Location::kNoOutputOverlap);
225       break;
226     case DataType::Type::kUint8:
227     case DataType::Type::kInt8:
228     case DataType::Type::kUint16:
229     case DataType::Type::kInt16:
230     case DataType::Type::kInt32:
231     case DataType::Type::kInt64:
232     case DataType::Type::kFloat32:
233     case DataType::Type::kFloat64:
234       locations->SetInAt(0, Location::RequiresFpuRegister());
235       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
236       break;
237     default:
238       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
239       UNREACHABLE();
240   }
241 }
242 
VisitVecReduce(HVecReduce * instruction)243 void LocationsBuilderARM64Sve::VisitVecReduce(HVecReduce* instruction) {
244   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
245 }
246 
VisitVecReduce(HVecReduce * instruction)247 void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
248   DCHECK(instruction->IsPredicated());
249   LocationSummary* locations = instruction->GetLocations();
250   const ZRegister src = ZRegisterFrom(locations->InAt(0));
251   const VRegister dst = DRegisterFrom(locations->Out());
252   const PRegister p_reg = LoopPReg();
253   ValidateVectorLength(instruction);
254   switch (instruction->GetPackedType()) {
255     case DataType::Type::kInt32:
256       switch (instruction->GetReductionKind()) {
257         case HVecReduce::kSum:
258           __ Saddv(dst.S(), p_reg, src.VnS());
259           break;
260         default:
261           LOG(FATAL) << "Unsupported SIMD instruction";
262           UNREACHABLE();
263       }
264       break;
265     case DataType::Type::kInt64:
266       switch (instruction->GetReductionKind()) {
267         case HVecReduce::kSum:
268           __ Uaddv(dst.D(), p_reg, src.VnD());
269           break;
270         default:
271           LOG(FATAL) << "Unsupported SIMD instruction";
272           UNREACHABLE();
273       }
274       break;
275     default:
276       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
277       UNREACHABLE();
278   }
279 }
280 
VisitVecCnv(HVecCnv * instruction)281 void LocationsBuilderARM64Sve::VisitVecCnv(HVecCnv* instruction) {
282   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
283 }
284 
VisitVecCnv(HVecCnv * instruction)285 void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
286   DCHECK(instruction->IsPredicated());
287   LocationSummary* locations = instruction->GetLocations();
288   const ZRegister src = ZRegisterFrom(locations->InAt(0));
289   const ZRegister dst = ZRegisterFrom(locations->Out());
290   const PRegisterM p_reg = LoopPReg().Merging();
291   DataType::Type from = instruction->GetInputType();
292   DataType::Type to = instruction->GetResultType();
293   ValidateVectorLength(instruction);
294   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
295     __ Scvtf(dst.VnS(), p_reg, src.VnS());
296   } else {
297     LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
298   }
299 }
300 
VisitVecNeg(HVecNeg * instruction)301 void LocationsBuilderARM64Sve::VisitVecNeg(HVecNeg* instruction) {
302   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
303 }
304 
VisitVecNeg(HVecNeg * instruction)305 void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
306   DCHECK(instruction->IsPredicated());
307   LocationSummary* locations = instruction->GetLocations();
308   const ZRegister src = ZRegisterFrom(locations->InAt(0));
309   const ZRegister dst = ZRegisterFrom(locations->Out());
310   const PRegisterM p_reg = LoopPReg().Merging();
311   ValidateVectorLength(instruction);
312   switch (instruction->GetPackedType()) {
313     case DataType::Type::kUint8:
314     case DataType::Type::kInt8:
315       __ Neg(dst.VnB(), p_reg, src.VnB());
316       break;
317     case DataType::Type::kUint16:
318     case DataType::Type::kInt16:
319       __ Neg(dst.VnH(), p_reg, src.VnH());
320       break;
321     case DataType::Type::kInt32:
322       __ Neg(dst.VnS(), p_reg, src.VnS());
323       break;
324     case DataType::Type::kInt64:
325       __ Neg(dst.VnD(), p_reg, src.VnD());
326       break;
327     case DataType::Type::kFloat32:
328       __ Fneg(dst.VnS(), p_reg, src.VnS());
329       break;
330     case DataType::Type::kFloat64:
331       __ Fneg(dst.VnD(), p_reg, src.VnD());
332       break;
333     default:
334       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
335       UNREACHABLE();
336   }
337 }
338 
VisitVecAbs(HVecAbs * instruction)339 void LocationsBuilderARM64Sve::VisitVecAbs(HVecAbs* instruction) {
340   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
341 }
342 
VisitVecAbs(HVecAbs * instruction)343 void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
344   DCHECK(instruction->IsPredicated());
345   LocationSummary* locations = instruction->GetLocations();
346   const ZRegister src = ZRegisterFrom(locations->InAt(0));
347   const ZRegister dst = ZRegisterFrom(locations->Out());
348   const PRegisterM p_reg = LoopPReg().Merging();
349   ValidateVectorLength(instruction);
350   switch (instruction->GetPackedType()) {
351     case DataType::Type::kInt8:
352       __ Abs(dst.VnB(), p_reg, src.VnB());
353       break;
354     case DataType::Type::kInt16:
355       __ Abs(dst.VnH(), p_reg, src.VnH());
356       break;
357     case DataType::Type::kInt32:
358       __ Abs(dst.VnS(), p_reg, src.VnS());
359       break;
360     case DataType::Type::kInt64:
361       __ Abs(dst.VnD(), p_reg, src.VnD());
362       break;
363     case DataType::Type::kFloat32:
364       __ Fabs(dst.VnS(), p_reg, src.VnS());
365       break;
366     case DataType::Type::kFloat64:
367       __ Fabs(dst.VnD(), p_reg, src.VnD());
368       break;
369     default:
370       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
371       UNREACHABLE();
372   }
373 }
374 
VisitVecNot(HVecNot * instruction)375 void LocationsBuilderARM64Sve::VisitVecNot(HVecNot* instruction) {
376   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
377 }
378 
VisitVecNot(HVecNot * instruction)379 void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
380   DCHECK(instruction->IsPredicated());
381   LocationSummary* locations = instruction->GetLocations();
382   const ZRegister src = ZRegisterFrom(locations->InAt(0));
383   const ZRegister dst = ZRegisterFrom(locations->Out());
384   const PRegisterM p_reg = LoopPReg().Merging();
385   ValidateVectorLength(instruction);
386   switch (instruction->GetPackedType()) {
387     case DataType::Type::kBool:  // special case boolean-not
388       __ Dup(dst.VnB(), 1);
389       __ Eor(dst.VnB(), p_reg, dst.VnB(), src.VnB());
390       break;
391     case DataType::Type::kUint8:
392     case DataType::Type::kInt8:
393       __ Not(dst.VnB(), p_reg, src.VnB());
394       break;
395     case DataType::Type::kUint16:
396     case DataType::Type::kInt16:
397       __ Not(dst.VnH(), p_reg, src.VnH());
398       break;
399     case DataType::Type::kInt32:
400       __ Not(dst.VnS(), p_reg, src.VnS());
401       break;
402     case DataType::Type::kInt64:
403       __ Not(dst.VnD(), p_reg, src.VnD());
404       break;
405     default:
406       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
407       UNREACHABLE();
408   }
409 }
410 
411 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)412 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
413   LocationSummary* locations = new (allocator) LocationSummary(instruction);
414   switch (instruction->GetPackedType()) {
415     case DataType::Type::kBool:
416     case DataType::Type::kUint8:
417     case DataType::Type::kInt8:
418     case DataType::Type::kUint16:
419     case DataType::Type::kInt16:
420     case DataType::Type::kInt32:
421     case DataType::Type::kInt64:
422     case DataType::Type::kFloat32:
423     case DataType::Type::kFloat64:
424       locations->SetInAt(0, Location::RequiresFpuRegister());
425       locations->SetInAt(1, Location::RequiresFpuRegister());
426       locations->SetOut(Location::SameAsFirstInput());
427       break;
428     default:
429       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
430       UNREACHABLE();
431   }
432 }
433 
VisitVecAdd(HVecAdd * instruction)434 void LocationsBuilderARM64Sve::VisitVecAdd(HVecAdd* instruction) {
435   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
436 }
437 
VisitVecAdd(HVecAdd * instruction)438 void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
439   DCHECK(instruction->IsPredicated());
440   LocationSummary* locations = instruction->GetLocations();
441   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
442   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
443   const ZRegister dst = ZRegisterFrom(locations->Out());
444   const PRegisterM p_reg = LoopPReg().Merging();
445   ValidateVectorLength(instruction);
446   switch (instruction->GetPackedType()) {
447     case DataType::Type::kUint8:
448     case DataType::Type::kInt8:
449       __ Add(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
450       break;
451     case DataType::Type::kUint16:
452     case DataType::Type::kInt16:
453       __ Add(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
454       break;
455     case DataType::Type::kInt32:
456       __ Add(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
457       break;
458     case DataType::Type::kInt64:
459       __ Add(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
460       break;
461     case DataType::Type::kFloat32:
462       __ Fadd(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
463       break;
464     case DataType::Type::kFloat64:
465       __ Fadd(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
466       break;
467     default:
468       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
469       UNREACHABLE();
470   }
471 }
472 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)473 void LocationsBuilderARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
474   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
475   UNREACHABLE();
476 }
477 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)478 void InstructionCodeGeneratorARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
479   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
480   UNREACHABLE();
481 }
482 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)483 void LocationsBuilderARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
484   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
485   UNREACHABLE();
486 }
487 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)488 void InstructionCodeGeneratorARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
489   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
490   UNREACHABLE();
491 }
492 
VisitVecSub(HVecSub * instruction)493 void LocationsBuilderARM64Sve::VisitVecSub(HVecSub* instruction) {
494   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
495 }
496 
VisitVecSub(HVecSub * instruction)497 void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
498   DCHECK(instruction->IsPredicated());
499   LocationSummary* locations = instruction->GetLocations();
500   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
501   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
502   const ZRegister dst = ZRegisterFrom(locations->Out());
503   const PRegisterM p_reg = LoopPReg().Merging();
504   ValidateVectorLength(instruction);
505   switch (instruction->GetPackedType()) {
506     case DataType::Type::kUint8:
507     case DataType::Type::kInt8:
508       __ Sub(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
509       break;
510     case DataType::Type::kUint16:
511     case DataType::Type::kInt16:
512       __ Sub(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
513       break;
514     case DataType::Type::kInt32:
515       __ Sub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
516       break;
517     case DataType::Type::kInt64:
518       __ Sub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
519       break;
520     case DataType::Type::kFloat32:
521       __ Fsub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
522       break;
523     case DataType::Type::kFloat64:
524       __ Fsub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
525       break;
526     default:
527       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
528       UNREACHABLE();
529   }
530 }
531 
VisitVecSaturationSub(HVecSaturationSub * instruction)532 void LocationsBuilderARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
533   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
534   UNREACHABLE();
535 }
536 
VisitVecSaturationSub(HVecSaturationSub * instruction)537 void InstructionCodeGeneratorARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
538   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
539   UNREACHABLE();
540 }
541 
VisitVecMul(HVecMul * instruction)542 void LocationsBuilderARM64Sve::VisitVecMul(HVecMul* instruction) {
543   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
544 }
545 
VisitVecMul(HVecMul * instruction)546 void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
547   DCHECK(instruction->IsPredicated());
548   LocationSummary* locations = instruction->GetLocations();
549   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
550   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
551   const ZRegister dst = ZRegisterFrom(locations->Out());
552   const PRegisterM p_reg = LoopPReg().Merging();
553   ValidateVectorLength(instruction);
554   switch (instruction->GetPackedType()) {
555     case DataType::Type::kUint8:
556     case DataType::Type::kInt8:
557       __ Mul(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
558       break;
559     case DataType::Type::kUint16:
560     case DataType::Type::kInt16:
561       __ Mul(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
562       break;
563     case DataType::Type::kInt32:
564       __ Mul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
565       break;
566     case DataType::Type::kInt64:
567       __ Mul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
568       break;
569     case DataType::Type::kFloat32:
570       __ Fmul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
571       break;
572     case DataType::Type::kFloat64:
573       __ Fmul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
574       break;
575     default:
576       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
577       UNREACHABLE();
578   }
579 }
580 
VisitVecDiv(HVecDiv * instruction)581 void LocationsBuilderARM64Sve::VisitVecDiv(HVecDiv* instruction) {
582   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
583 }
584 
VisitVecDiv(HVecDiv * instruction)585 void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
586   DCHECK(instruction->IsPredicated());
587   LocationSummary* locations = instruction->GetLocations();
588   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
589   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
590   const ZRegister dst = ZRegisterFrom(locations->Out());
591   const PRegisterM p_reg = LoopPReg().Merging();
592   ValidateVectorLength(instruction);
593 
594   // Note: VIXL guarantees StrictNaNPropagation for Fdiv.
595   switch (instruction->GetPackedType()) {
596     case DataType::Type::kFloat32:
597       __ Fdiv(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
598       break;
599     case DataType::Type::kFloat64:
600       __ Fdiv(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
601       break;
602     default:
603       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
604       UNREACHABLE();
605   }
606 }
607 
VisitVecMin(HVecMin * instruction)608 void LocationsBuilderARM64Sve::VisitVecMin(HVecMin* instruction) {
609   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
610   UNREACHABLE();
611 }
612 
VisitVecMin(HVecMin * instruction)613 void InstructionCodeGeneratorARM64Sve::VisitVecMin(HVecMin* instruction) {
614   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
615   UNREACHABLE();
616 }
617 
VisitVecMax(HVecMax * instruction)618 void LocationsBuilderARM64Sve::VisitVecMax(HVecMax* instruction) {
619   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
620   UNREACHABLE();
621 }
622 
VisitVecMax(HVecMax * instruction)623 void InstructionCodeGeneratorARM64Sve::VisitVecMax(HVecMax* instruction) {
624   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
625   UNREACHABLE();
626 }
627 
VisitVecAnd(HVecAnd * instruction)628 void LocationsBuilderARM64Sve::VisitVecAnd(HVecAnd* instruction) {
629   // TODO: Allow constants supported by BIC (vector, immediate).
630   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
631 }
632 
VisitVecAnd(HVecAnd * instruction)633 void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
634   DCHECK(instruction->IsPredicated());
635   LocationSummary* locations = instruction->GetLocations();
636   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
637   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
638   const ZRegister dst = ZRegisterFrom(locations->Out());
639   const PRegisterM p_reg = LoopPReg().Merging();
640   ValidateVectorLength(instruction);
641   switch (instruction->GetPackedType()) {
642     case DataType::Type::kBool:
643     case DataType::Type::kUint8:
644     case DataType::Type::kInt8:
645       __ And(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
646       break;
647     case DataType::Type::kUint16:
648     case DataType::Type::kInt16:
649       __ And(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
650       break;
651     case DataType::Type::kInt32:
652     case DataType::Type::kFloat32:
653       __ And(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
654       break;
655     case DataType::Type::kInt64:
656     case DataType::Type::kFloat64:
657       __ And(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
658       break;
659     default:
660       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
661       UNREACHABLE();
662   }
663 }
664 
VisitVecAndNot(HVecAndNot * instruction)665 void LocationsBuilderARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
666   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
667 }
668 
VisitVecAndNot(HVecAndNot * instruction)669 void InstructionCodeGeneratorARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
670   // TODO: Use BIC (vector, register).
671   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
672 }
673 
VisitVecOr(HVecOr * instruction)674 void LocationsBuilderARM64Sve::VisitVecOr(HVecOr* instruction) {
675   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
676 }
677 
VisitVecOr(HVecOr * instruction)678 void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
679   DCHECK(instruction->IsPredicated());
680   LocationSummary* locations = instruction->GetLocations();
681   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
682   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
683   const ZRegister dst = ZRegisterFrom(locations->Out());
684   const PRegisterM p_reg = LoopPReg().Merging();
685   ValidateVectorLength(instruction);
686   switch (instruction->GetPackedType()) {
687     case DataType::Type::kBool:
688     case DataType::Type::kUint8:
689     case DataType::Type::kInt8:
690       __ Orr(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
691       break;
692     case DataType::Type::kUint16:
693     case DataType::Type::kInt16:
694       __ Orr(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
695       break;
696     case DataType::Type::kInt32:
697     case DataType::Type::kFloat32:
698       __ Orr(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
699       break;
700     case DataType::Type::kInt64:
701     case DataType::Type::kFloat64:
702       __ Orr(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
703       break;
704     default:
705       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
706       UNREACHABLE();
707   }
708 }
709 
VisitVecXor(HVecXor * instruction)710 void LocationsBuilderARM64Sve::VisitVecXor(HVecXor* instruction) {
711   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
712 }
713 
VisitVecXor(HVecXor * instruction)714 void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
715   DCHECK(instruction->IsPredicated());
716   LocationSummary* locations = instruction->GetLocations();
717   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
718   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
719   const ZRegister dst = ZRegisterFrom(locations->Out());
720   const PRegisterM p_reg = LoopPReg().Merging();
721   ValidateVectorLength(instruction);
722   switch (instruction->GetPackedType()) {
723     case DataType::Type::kBool:
724     case DataType::Type::kUint8:
725     case DataType::Type::kInt8:
726       __ Eor(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
727       break;
728     case DataType::Type::kUint16:
729     case DataType::Type::kInt16:
730       __ Eor(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
731       break;
732     case DataType::Type::kInt32:
733     case DataType::Type::kFloat32:
734       __ Eor(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
735       break;
736     case DataType::Type::kInt64:
737     case DataType::Type::kFloat64:
738       __ Eor(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
739       break;
740     default:
741       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
742       UNREACHABLE();
743   }
744 }
745 
746 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)747 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
748   LocationSummary* locations = new (allocator) LocationSummary(instruction);
749   switch (instruction->GetPackedType()) {
750     case DataType::Type::kUint8:
751     case DataType::Type::kInt8:
752     case DataType::Type::kUint16:
753     case DataType::Type::kInt16:
754     case DataType::Type::kInt32:
755     case DataType::Type::kInt64:
756       locations->SetInAt(0, Location::RequiresFpuRegister());
757       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
758       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
759       break;
760     default:
761       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
762       UNREACHABLE();
763   }
764 }
765 
VisitVecShl(HVecShl * instruction)766 void LocationsBuilderARM64Sve::VisitVecShl(HVecShl* instruction) {
767   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
768 }
769 
VisitVecShl(HVecShl * instruction)770 void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
771   DCHECK(instruction->IsPredicated());
772   LocationSummary* locations = instruction->GetLocations();
773   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
774   const ZRegister dst = ZRegisterFrom(locations->Out());
775   const PRegisterM p_reg = LoopPReg().Merging();
776   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
777   ValidateVectorLength(instruction);
778   switch (instruction->GetPackedType()) {
779     case DataType::Type::kUint8:
780     case DataType::Type::kInt8:
781       __ Lsl(dst.VnB(), p_reg, lhs.VnB(), value);
782       break;
783     case DataType::Type::kUint16:
784     case DataType::Type::kInt16:
785       __ Lsl(dst.VnH(), p_reg, lhs.VnH(), value);
786       break;
787     case DataType::Type::kInt32:
788       __ Lsl(dst.VnS(), p_reg, lhs.VnS(), value);
789       break;
790     case DataType::Type::kInt64:
791       __ Lsl(dst.VnD(), p_reg, lhs.VnD(), value);
792       break;
793     default:
794       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
795       UNREACHABLE();
796   }
797 }
798 
VisitVecShr(HVecShr * instruction)799 void LocationsBuilderARM64Sve::VisitVecShr(HVecShr* instruction) {
800   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
801 }
802 
VisitVecShr(HVecShr * instruction)803 void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
804   DCHECK(instruction->IsPredicated());
805   LocationSummary* locations = instruction->GetLocations();
806   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
807   const ZRegister dst = ZRegisterFrom(locations->Out());
808   const PRegisterM p_reg = LoopPReg().Merging();
809   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
810   ValidateVectorLength(instruction);
811   switch (instruction->GetPackedType()) {
812     case DataType::Type::kUint8:
813     case DataType::Type::kInt8:
814       __ Asr(dst.VnB(), p_reg, lhs.VnB(), value);
815       break;
816     case DataType::Type::kUint16:
817     case DataType::Type::kInt16:
818       __ Asr(dst.VnH(), p_reg, lhs.VnH(), value);
819       break;
820     case DataType::Type::kInt32:
821       __ Asr(dst.VnS(), p_reg, lhs.VnS(), value);
822       break;
823     case DataType::Type::kInt64:
824       __ Asr(dst.VnD(), p_reg, lhs.VnD(), value);
825       break;
826     default:
827       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
828       UNREACHABLE();
829   }
830 }
831 
VisitVecUShr(HVecUShr * instruction)832 void LocationsBuilderARM64Sve::VisitVecUShr(HVecUShr* instruction) {
833   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
834 }
835 
VisitVecUShr(HVecUShr * instruction)836 void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
837   DCHECK(instruction->IsPredicated());
838   LocationSummary* locations = instruction->GetLocations();
839   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
840   const ZRegister dst = ZRegisterFrom(locations->Out());
841   const PRegisterM p_reg = LoopPReg().Merging();
842   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
843   ValidateVectorLength(instruction);
844   switch (instruction->GetPackedType()) {
845     case DataType::Type::kUint8:
846     case DataType::Type::kInt8:
847       __ Lsr(dst.VnB(), p_reg, lhs.VnB(), value);
848       break;
849     case DataType::Type::kUint16:
850     case DataType::Type::kInt16:
851       __ Lsr(dst.VnH(), p_reg, lhs.VnH(), value);
852       break;
853     case DataType::Type::kInt32:
854       __ Lsr(dst.VnS(), p_reg, lhs.VnS(), value);
855       break;
856     case DataType::Type::kInt64:
857       __ Lsr(dst.VnD(), p_reg, lhs.VnD(), value);
858       break;
859     default:
860       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
861       UNREACHABLE();
862   }
863 }
864 
VisitVecSetScalars(HVecSetScalars * instruction)865 void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
866   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
867 
868   DCHECK_EQ(2u, instruction->InputCount());  // only one input currently implemented + predicate.
869 
870   HInstruction* input = instruction->InputAt(0);
871   bool is_zero = IsZeroBitPattern(input);
872 
873   switch (instruction->GetPackedType()) {
874     case DataType::Type::kBool:
875     case DataType::Type::kUint8:
876     case DataType::Type::kInt8:
877     case DataType::Type::kUint16:
878     case DataType::Type::kInt16:
879     case DataType::Type::kInt32:
880     case DataType::Type::kInt64:
881       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
882                                     : Location::RequiresRegister());
883       locations->SetOut(Location::RequiresFpuRegister());
884       break;
885     case DataType::Type::kFloat32:
886     case DataType::Type::kFloat64:
887       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
888                                     : Location::RequiresFpuRegister());
889       locations->SetOut(Location::RequiresFpuRegister());
890       break;
891     default:
892       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
893       UNREACHABLE();
894   }
895 }
896 
VisitVecSetScalars(HVecSetScalars * instruction)897 void InstructionCodeGeneratorARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
898   DCHECK(instruction->IsPredicated());
899   LocationSummary* locations = instruction->GetLocations();
900   const ZRegister z_dst = ZRegisterFrom(locations->Out());
901 
902   DCHECK_EQ(2u, instruction->InputCount());  // only one input currently implemented + predicate.
903 
904   // Zero out all other elements first.
905   __ Dup(z_dst.VnB(), 0);
906 
907   const VRegister dst = VRegisterFrom(locations->Out());
908   // Shorthand for any type of zero.
909   if (IsZeroBitPattern(instruction->InputAt(0))) {
910     return;
911   }
912   ValidateVectorLength(instruction);
913 
914   // Set required elements.
915   switch (instruction->GetPackedType()) {
916     case DataType::Type::kBool:
917     case DataType::Type::kUint8:
918     case DataType::Type::kInt8:
919       __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
920       break;
921     case DataType::Type::kUint16:
922     case DataType::Type::kInt16:
923       __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
924       break;
925     case DataType::Type::kInt32:
926       __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
927       break;
928     case DataType::Type::kInt64:
929       __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
930       break;
931     default:
932       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
933       UNREACHABLE();
934   }
935 }
936 
937 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)938 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
939   LocationSummary* locations = new (allocator) LocationSummary(instruction);
940   switch (instruction->GetPackedType()) {
941     case DataType::Type::kUint8:
942     case DataType::Type::kInt8:
943     case DataType::Type::kUint16:
944     case DataType::Type::kInt16:
945     case DataType::Type::kInt32:
946     case DataType::Type::kInt64:
947       locations->SetInAt(0, Location::RequiresFpuRegister());
948       locations->SetInAt(1, Location::RequiresFpuRegister());
949       locations->SetInAt(2, Location::RequiresFpuRegister());
950       locations->SetOut(Location::SameAsFirstInput());
951       break;
952     default:
953       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
954       UNREACHABLE();
955   }
956 }
957 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)958 void LocationsBuilderARM64Sve::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
959   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
960 }
961 
962 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
963 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
964 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)965 void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
966     HVecMultiplyAccumulate* instruction) {
967   DCHECK(instruction->IsPredicated());
968   LocationSummary* locations = instruction->GetLocations();
969   const ZRegister acc = ZRegisterFrom(locations->InAt(0));
970   const ZRegister left = ZRegisterFrom(locations->InAt(1));
971   const ZRegister right = ZRegisterFrom(locations->InAt(2));
972   const PRegisterM p_reg = LoopPReg().Merging();
973 
974   DCHECK(locations->InAt(0).Equals(locations->Out()));
975   ValidateVectorLength(instruction);
976 
977   switch (instruction->GetPackedType()) {
978     case DataType::Type::kUint8:
979     case DataType::Type::kInt8:
980       if (instruction->GetOpKind() == HInstruction::kAdd) {
981         __ Mla(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
982       } else {
983         __ Mls(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
984       }
985       break;
986     case DataType::Type::kUint16:
987     case DataType::Type::kInt16:
988       if (instruction->GetOpKind() == HInstruction::kAdd) {
989         __ Mla(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
990       } else {
991         __ Mls(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
992       }
993       break;
994     case DataType::Type::kInt32:
995       if (instruction->GetOpKind() == HInstruction::kAdd) {
996         __ Mla(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
997       } else {
998         __ Mls(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
999       }
1000       break;
1001     default:
1002       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1003       UNREACHABLE();
1004   }
1005 }
1006 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1007 void LocationsBuilderARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1008   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
1009   UNREACHABLE();
1010 }
1011 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1012 void InstructionCodeGeneratorARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1013   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
1014   UNREACHABLE();
1015 }
1016 
VisitVecDotProd(HVecDotProd * instruction)1017 void LocationsBuilderARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
1018   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1019   DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1020   locations->SetInAt(0, Location::RequiresFpuRegister());
1021   locations->SetInAt(1, Location::RequiresFpuRegister());
1022   locations->SetInAt(2, Location::RequiresFpuRegister());
1023   locations->SetOut(Location::SameAsFirstInput());
1024 
1025   locations->AddTemp(Location::RequiresFpuRegister());
1026 }
1027 
VisitVecDotProd(HVecDotProd * instruction)1028 void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
1029   DCHECK(instruction->IsPredicated());
1030   LocationSummary* locations = instruction->GetLocations();
1031   DCHECK(locations->InAt(0).Equals(locations->Out()));
1032   const ZRegister acc = ZRegisterFrom(locations->InAt(0));
1033   const ZRegister left = ZRegisterFrom(locations->InAt(1));
1034   const ZRegister right = ZRegisterFrom(locations->InAt(2));
1035   const PRegisterM p_reg = LoopPReg().Merging();
1036   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1037   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1038   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1039             HVecOperation::ToSignedType(b->GetPackedType()));
1040   DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1041   ValidateVectorLength(instruction);
1042 
1043   size_t inputs_data_size = DataType::Size(a->GetPackedType());
1044   switch (inputs_data_size) {
1045     case 1u: {
1046       UseScratchRegisterScope temps(GetVIXLAssembler());
1047       const ZRegister tmp0 = temps.AcquireZ();
1048       const ZRegister tmp1 = ZRegisterFrom(locations->GetTemp(0));
1049 
1050       __ Dup(tmp1.VnB(), 0u);
1051       __ Sel(tmp0.VnB(), p_reg, left.VnB(), tmp1.VnB());
1052       __ Sel(tmp1.VnB(), p_reg, right.VnB(), tmp1.VnB());
1053       if (instruction->IsZeroExtending()) {
1054         __ Udot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
1055       } else {
1056         __ Sdot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
1057       }
1058       break;
1059     }
1060     default:
1061       LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1062   }
1063 }
1064 
1065 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1066 static void CreateVecMemLocations(ArenaAllocator* allocator,
1067                                   HVecMemoryOperation* instruction,
1068                                   bool is_load) {
1069   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1070   switch (instruction->GetPackedType()) {
1071     case DataType::Type::kBool:
1072     case DataType::Type::kUint8:
1073     case DataType::Type::kInt8:
1074     case DataType::Type::kUint16:
1075     case DataType::Type::kInt16:
1076     case DataType::Type::kInt32:
1077     case DataType::Type::kInt64:
1078     case DataType::Type::kFloat32:
1079     case DataType::Type::kFloat64:
1080       locations->SetInAt(0, Location::RequiresRegister());
1081       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1082       if (is_load) {
1083         locations->SetOut(Location::RequiresFpuRegister());
1084       } else {
1085         locations->SetInAt(2, Location::RequiresFpuRegister());
1086       }
1087       break;
1088     default:
1089       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1090       UNREACHABLE();
1091   }
1092 }
1093 
VisitVecLoad(HVecLoad * instruction)1094 void LocationsBuilderARM64Sve::VisitVecLoad(HVecLoad* instruction) {
1095   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1096 }
1097 
VisitVecLoad(HVecLoad * instruction)1098 void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
1099   DCHECK(instruction->IsPredicated());
1100   LocationSummary* locations = instruction->GetLocations();
1101   size_t size = DataType::Size(instruction->GetPackedType());
1102   const ZRegister reg = ZRegisterFrom(locations->Out());
1103   UseScratchRegisterScope temps(GetVIXLAssembler());
1104   Register scratch;
1105   const PRegisterZ p_reg = LoopPReg().Zeroing();
1106   ValidateVectorLength(instruction);
1107 
1108   switch (instruction->GetPackedType()) {
1109     case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1110     case DataType::Type::kUint16:
1111       __ Ld1h(reg.VnH(), p_reg,
1112               VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1113       break;
1114     case DataType::Type::kBool:
1115     case DataType::Type::kUint8:
1116     case DataType::Type::kInt8:
1117       __ Ld1b(reg.VnB(), p_reg,
1118               VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1119       break;
1120     case DataType::Type::kInt32:
1121     case DataType::Type::kFloat32:
1122       __ Ld1w(reg.VnS(), p_reg,
1123               VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1124       break;
1125     case DataType::Type::kInt64:
1126     case DataType::Type::kFloat64:
1127       __ Ld1d(reg.VnD(), p_reg,
1128               VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1129       break;
1130     default:
1131       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1132       UNREACHABLE();
1133   }
1134 }
1135 
VisitVecStore(HVecStore * instruction)1136 void LocationsBuilderARM64Sve::VisitVecStore(HVecStore* instruction) {
1137   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1138 }
1139 
VisitVecStore(HVecStore * instruction)1140 void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
1141   DCHECK(instruction->IsPredicated());
1142   LocationSummary* locations = instruction->GetLocations();
1143   size_t size = DataType::Size(instruction->GetPackedType());
1144   const ZRegister reg = ZRegisterFrom(locations->InAt(2));
1145   UseScratchRegisterScope temps(GetVIXLAssembler());
1146   Register scratch;
1147   const PRegisterZ p_reg = LoopPReg().Zeroing();
1148   ValidateVectorLength(instruction);
1149 
1150   switch (instruction->GetPackedType()) {
1151     case DataType::Type::kBool:
1152     case DataType::Type::kUint8:
1153     case DataType::Type::kInt8:
1154       __ St1b(reg.VnB(), p_reg,
1155           VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1156       break;
1157     case DataType::Type::kUint16:
1158     case DataType::Type::kInt16:
1159       __ St1h(reg.VnH(), p_reg,
1160           VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1161       break;
1162     case DataType::Type::kInt32:
1163     case DataType::Type::kFloat32:
1164       __ St1w(reg.VnS(), p_reg,
1165           VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1166       break;
1167     case DataType::Type::kInt64:
1168     case DataType::Type::kFloat64:
1169       __ St1d(reg.VnD(), p_reg,
1170           VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1171       break;
1172     default:
1173       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1174       UNREACHABLE();
1175   }
1176 }
1177 
VisitVecPredSetAll(HVecPredSetAll * instruction)1178 void LocationsBuilderARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1179   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1180   DCHECK(instruction->InputAt(0)->IsIntConstant());
1181   locations->SetInAt(0, Location::NoLocation());
1182   locations->SetOut(Location::NoLocation());
1183 }
1184 
VisitVecPredSetAll(HVecPredSetAll * instruction)1185 void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1186   // Instruction is not predicated, see nodes_vector.h
1187   DCHECK(!instruction->IsPredicated());
1188   const PRegister p_reg = LoopPReg();
1189 
1190   switch (instruction->GetPackedType()) {
1191     case DataType::Type::kBool:
1192     case DataType::Type::kUint8:
1193     case DataType::Type::kInt8:
1194       __ Ptrue(p_reg.VnB(), vixl::aarch64::SVE_ALL);
1195       break;
1196     case DataType::Type::kUint16:
1197     case DataType::Type::kInt16:
1198       __ Ptrue(p_reg.VnH(), vixl::aarch64::SVE_ALL);
1199       break;
1200     case DataType::Type::kInt32:
1201     case DataType::Type::kFloat32:
1202       __ Ptrue(p_reg.VnS(), vixl::aarch64::SVE_ALL);
1203       break;
1204     case DataType::Type::kInt64:
1205     case DataType::Type::kFloat64:
1206       __ Ptrue(p_reg.VnD(), vixl::aarch64::SVE_ALL);
1207       break;
1208     default:
1209       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1210       UNREACHABLE();
1211   }
1212 }
1213 
VisitVecPredWhile(HVecPredWhile * instruction)1214 void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
1215   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1216   locations->SetInAt(0, Location::RequiresRegister());
1217   locations->SetInAt(1, Location::RequiresRegister());
1218   // The instruction doesn't really need a core register as out location; this is a hack
1219   // to workaround absence of support for vector predicates in register allocation.
1220   //
1221   // Semantically, the out location of this instruction and predicate inputs locations of
1222   // its users should be a fixed predicate register (similar to
1223   // Location::RegisterLocation(int reg)). But the register allocator (RA) doesn't support
1224   // SIMD regs (e.g. predicate), so LoopPReg() is used explicitly without exposing it
1225   // to the RA.
1226   //
1227   // To make the RA happy Location::NoLocation() was used for all the vector instructions
1228   // predicate inputs; but for the PredSetOperations (e.g. VecPredWhile) Location::NoLocation()
1229   // can't be used without changes to RA - "ssa_liveness_analysis.cc] Check failed:
1230   // input->IsEmittedAtUseSite()" would fire.
1231   //
1232   // Using a core register as a hack is the easiest way to tackle this problem. The RA will
1233   // block one core register for the loop without actually using it; this should not be
1234   // a performance issue as a SIMD loop operates mainly on SIMD registers.
1235   //
1236   // TODO: Support SIMD types in register allocator.
1237   locations->SetOut(Location::RequiresRegister());
1238 }
1239 
VisitVecPredWhile(HVecPredWhile * instruction)1240 void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
1241   // Instruction is not predicated, see nodes_vector.h
1242   DCHECK(!instruction->IsPredicated());
1243   // Current implementation of predicated loop execution only supports kLO condition.
1244   DCHECK(instruction->GetCondKind() == HVecPredWhile::CondKind::kLO);
1245   Register left = InputRegisterAt(instruction, 0);
1246   Register right = InputRegisterAt(instruction, 1);
1247 
1248   DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u);
1249 
1250   switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) {
1251     case 1u:
1252       __ Whilelo(LoopPReg().VnB(), left, right);
1253       break;
1254     case 2u:
1255       __ Whilelo(LoopPReg().VnH(), left, right);
1256       break;
1257     case 4u:
1258       __ Whilelo(LoopPReg().VnS(), left, right);
1259       break;
1260     case 8u:
1261       __ Whilelo(LoopPReg().VnD(), left, right);
1262       break;
1263     default:
1264       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1265       UNREACHABLE();
1266   }
1267 }
1268 
VisitVecPredCondition(HVecPredCondition * instruction)1269 void LocationsBuilderARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
1270   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1271   locations->SetInAt(0, Location::NoLocation());
1272   // Result of the operation - a boolean value in a core register.
1273   locations->SetOut(Location::RequiresRegister());
1274 }
1275 
VisitVecPredCondition(HVecPredCondition * instruction)1276 void InstructionCodeGeneratorARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
1277   // Instruction is not predicated, see nodes_vector.h
1278   DCHECK(!instruction->IsPredicated());
1279   Register reg = OutputRegister(instruction);
1280   // Currently VecPredCondition is only used as part of vectorized loop check condition
1281   // evaluation.
1282   DCHECK(instruction->GetPCondKind() == HVecPredCondition::PCondKind::kNFirst);
1283   __ Cset(reg, pl);
1284 }
1285 
AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope * scope)1286 Location InstructionCodeGeneratorARM64Sve::AllocateSIMDScratchLocation(
1287     vixl::aarch64::UseScratchRegisterScope* scope) {
1288   return LocationFrom(scope->AcquireZ());
1289 }
1290 
FreeSIMDScratchLocation(Location loc,vixl::aarch64::UseScratchRegisterScope * scope)1291 void InstructionCodeGeneratorARM64Sve::FreeSIMDScratchLocation(Location loc,
1292     vixl::aarch64::UseScratchRegisterScope* scope) {
1293   scope->Release(ZRegisterFrom(loc));
1294 }
1295 
LoadSIMDRegFromStack(Location destination,Location source)1296 void InstructionCodeGeneratorARM64Sve::LoadSIMDRegFromStack(Location destination,
1297                                                             Location source) {
1298   __ Ldr(ZRegisterFrom(destination), SveStackOperandFrom(source));
1299 }
1300 
MoveSIMDRegToSIMDReg(Location destination,Location source)1301 void InstructionCodeGeneratorARM64Sve::MoveSIMDRegToSIMDReg(Location destination,
1302                                                             Location source) {
1303   __ Mov(ZRegisterFrom(destination), ZRegisterFrom(source));
1304 }
1305 
MoveToSIMDStackSlot(Location destination,Location source)1306 void InstructionCodeGeneratorARM64Sve::MoveToSIMDStackSlot(Location destination,
1307                                                            Location source) {
1308   DCHECK(destination.IsSIMDStackSlot());
1309 
1310   if (source.IsFpuRegister()) {
1311     __ Str(ZRegisterFrom(source), SveStackOperandFrom(destination));
1312   } else {
1313     DCHECK(source.IsSIMDStackSlot());
1314     UseScratchRegisterScope temps(GetVIXLAssembler());
1315     if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
1316       // Very rare situation, only when there are cycles in ParallelMoveResolver graph.
1317       const Register temp = temps.AcquireX();
1318       DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % kArm64WordSize, 0u);
1319       // Emit a number of LDR/STR (XRegister, 64-bit) to cover the whole SIMD register size
1320       // when copying a stack slot.
1321       for (size_t offset = 0, e = codegen_->GetSIMDRegisterWidth();
1322            offset < e;
1323            offset += kArm64WordSize) {
1324         __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + offset));
1325         __ Str(temp, MemOperand(sp, destination.GetStackIndex() + offset));
1326       }
1327     } else {
1328       const ZRegister temp = temps.AcquireZ();
1329       __ Ldr(temp, SveStackOperandFrom(source));
1330       __ Str(temp, SveStackOperandFrom(destination));
1331     }
1332   }
1333 }
1334 
1335 template <bool is_save>
SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64 * codegen,LocationSummary * locations,int64_t spill_offset)1336 void SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64* codegen,
1337                                            LocationSummary* locations,
1338                                            int64_t spill_offset) {
1339   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
1340   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
1341   DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
1342                                                   codegen->GetNumberOfCoreRegisters(),
1343                                                   fp_spills,
1344                                                   codegen->GetNumberOfFloatingPointRegisters()));
1345   MacroAssembler* masm = codegen->GetVIXLAssembler();
1346   Register base = masm->StackPointer();
1347 
1348   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
1349   int64_t core_spill_size = core_list.GetTotalSizeInBytes();
1350   int64_t fp_spill_offset = spill_offset + core_spill_size;
1351 
1352   if (codegen->GetGraph()->HasSIMD()) {
1353     if (is_save) {
1354       masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1355     } else {
1356       masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1357     }
1358     codegen->GetAssembler()->SaveRestoreZRegisterList<is_save>(fp_spills, fp_spill_offset);
1359     return;
1360   }
1361 
1362   // Case when we only need to restore D-registers.
1363   DCHECK(!codegen->GetGraph()->HasSIMD());
1364   DCHECK_LE(codegen->GetSlowPathFPWidth(), kDRegSizeInBytes);
1365   CPURegList fp_list = CPURegList(CPURegister::kVRegister, kDRegSize, fp_spills);
1366   if (is_save) {
1367     masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1368     masm->StoreCPURegList(fp_list, MemOperand(base, fp_spill_offset));
1369   } else {
1370     masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1371     masm->LoadCPURegList(fp_list, MemOperand(base, fp_spill_offset));
1372   }
1373 }
1374 
SaveLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1375 void InstructionCodeGeneratorARM64Sve::SaveLiveRegistersHelper(LocationSummary* locations,
1376                                                                int64_t spill_offset) {
1377   SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ true>(codegen_, locations, spill_offset);
1378 }
1379 
RestoreLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1380 void InstructionCodeGeneratorARM64Sve::RestoreLiveRegistersHelper(LocationSummary* locations,
1381                                                                   int64_t spill_offset) {
1382   SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ false>(codegen_, locations, spill_offset);
1383 }
1384 
1385 #undef __
1386 
1387 }  // namespace arm64
1388 }  // namespace art
1389