1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "base/bit_utils_iterator.h"
21 #include "mirror/array-inl.h"
22 #include "mirror/string.h"
23 
24 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
25 
26 namespace art HIDDEN {
27 namespace arm64 {
28 
29 using helpers::DRegisterFrom;
30 using helpers::HeapOperand;
31 using helpers::InputRegisterAt;
32 using helpers::Int64FromLocation;
33 using helpers::LocationFrom;
34 using helpers::OutputRegister;
35 using helpers::QRegisterFrom;
36 using helpers::StackOperandFrom;
37 using helpers::VRegisterFrom;
38 using helpers::XRegisterFrom;
39 
40 #define __ GetVIXLAssembler()->
41 
42 // Returns whether the value of the constant can be directly encoded into the instruction as
43 // immediate.
NEONCanEncodeConstantAsImmediate(HConstant * constant,HInstruction * instr)44 inline bool NEONCanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
45   // TODO: Improve this when IsSIMDConstantEncodable method is implemented in VIXL.
46   if (instr->IsVecReplicateScalar()) {
47     if (constant->IsLongConstant()) {
48       return false;
49     } else if (constant->IsFloatConstant()) {
50       return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
51     } else if (constant->IsDoubleConstant()) {
52       return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
53     }
54     int64_t value = CodeGenerator::GetInt64ValueOf(constant);
55     return IsUint<8>(value);
56   }
57   return false;
58 }
59 
60 // Returns
61 //  - constant location - if 'constant' is an actual constant and its value can be
62 //    encoded into the instruction.
63 //  - register location otherwise.
NEONEncodableConstantOrRegister(HInstruction * constant,HInstruction * instr)64 inline Location NEONEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
65   if (constant->IsConstant() && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
66     return Location::ConstantLocation(constant);
67   }
68 
69   return Location::RequiresRegister();
70 }
71 
72 // Returns whether dot product instructions should be emitted.
ShouldEmitDotProductInstructions(const CodeGeneratorARM64 * codegen_)73 static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
74   return codegen_->GetInstructionSetFeatures().HasDotProd();
75 }
76 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)77 void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
78   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
79   HInstruction* input = instruction->InputAt(0);
80   switch (instruction->GetPackedType()) {
81     case DataType::Type::kBool:
82     case DataType::Type::kUint8:
83     case DataType::Type::kInt8:
84     case DataType::Type::kUint16:
85     case DataType::Type::kInt16:
86     case DataType::Type::kInt32:
87     case DataType::Type::kInt64:
88       locations->SetInAt(0, NEONEncodableConstantOrRegister(input, instruction));
89       locations->SetOut(Location::RequiresFpuRegister());
90       break;
91     case DataType::Type::kFloat32:
92     case DataType::Type::kFloat64:
93       if (input->IsConstant() &&
94           NEONCanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
95         locations->SetInAt(0, Location::ConstantLocation(input));
96         locations->SetOut(Location::RequiresFpuRegister());
97       } else {
98         locations->SetInAt(0, Location::RequiresFpuRegister());
99         locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
100       }
101       break;
102     default:
103       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
104       UNREACHABLE();
105   }
106 }
107 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)108 void InstructionCodeGeneratorARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
109   LocationSummary* locations = instruction->GetLocations();
110   Location src_loc = locations->InAt(0);
111   VRegister dst = VRegisterFrom(locations->Out());
112   switch (instruction->GetPackedType()) {
113     case DataType::Type::kBool:
114     case DataType::Type::kUint8:
115     case DataType::Type::kInt8:
116       DCHECK_EQ(16u, instruction->GetVectorLength());
117       if (src_loc.IsConstant()) {
118         __ Movi(dst.V16B(), Int64FromLocation(src_loc));
119       } else {
120         __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
121       }
122       break;
123     case DataType::Type::kUint16:
124     case DataType::Type::kInt16:
125       DCHECK_EQ(8u, instruction->GetVectorLength());
126       if (src_loc.IsConstant()) {
127         __ Movi(dst.V8H(), Int64FromLocation(src_loc));
128       } else {
129         __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
130       }
131       break;
132     case DataType::Type::kInt32:
133       DCHECK_EQ(4u, instruction->GetVectorLength());
134       if (src_loc.IsConstant()) {
135         __ Movi(dst.V4S(), Int64FromLocation(src_loc));
136       } else {
137         __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
138       }
139       break;
140     case DataType::Type::kInt64:
141       DCHECK_EQ(2u, instruction->GetVectorLength());
142       if (src_loc.IsConstant()) {
143         __ Movi(dst.V2D(), Int64FromLocation(src_loc));
144       } else {
145         __ Dup(dst.V2D(), XRegisterFrom(src_loc));
146       }
147       break;
148     case DataType::Type::kFloat32:
149       DCHECK_EQ(4u, instruction->GetVectorLength());
150       if (src_loc.IsConstant()) {
151         __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
152       } else {
153         __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
154       }
155       break;
156     case DataType::Type::kFloat64:
157       DCHECK_EQ(2u, instruction->GetVectorLength());
158       if (src_loc.IsConstant()) {
159         __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
160       } else {
161         __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
162       }
163       break;
164     default:
165       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
166       UNREACHABLE();
167   }
168 }
169 
VisitVecExtractScalar(HVecExtractScalar * instruction)170 void LocationsBuilderARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
171   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
172   switch (instruction->GetPackedType()) {
173     case DataType::Type::kBool:
174     case DataType::Type::kUint8:
175     case DataType::Type::kInt8:
176     case DataType::Type::kUint16:
177     case DataType::Type::kInt16:
178     case DataType::Type::kInt32:
179     case DataType::Type::kInt64:
180       locations->SetInAt(0, Location::RequiresFpuRegister());
181       locations->SetOut(Location::RequiresRegister());
182       break;
183     case DataType::Type::kFloat32:
184     case DataType::Type::kFloat64:
185       locations->SetInAt(0, Location::RequiresFpuRegister());
186       locations->SetOut(Location::SameAsFirstInput());
187       break;
188     default:
189       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
190       UNREACHABLE();
191   }
192 }
193 
VisitVecExtractScalar(HVecExtractScalar * instruction)194 void InstructionCodeGeneratorARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
195   LocationSummary* locations = instruction->GetLocations();
196   VRegister src = VRegisterFrom(locations->InAt(0));
197   switch (instruction->GetPackedType()) {
198     case DataType::Type::kInt32:
199       DCHECK_EQ(4u, instruction->GetVectorLength());
200       __ Umov(OutputRegister(instruction), src.V4S(), 0);
201       break;
202     case DataType::Type::kInt64:
203       DCHECK_EQ(2u, instruction->GetVectorLength());
204       __ Umov(OutputRegister(instruction), src.V2D(), 0);
205       break;
206     case DataType::Type::kFloat32:
207     case DataType::Type::kFloat64:
208       DCHECK_LE(2u, instruction->GetVectorLength());
209       DCHECK_LE(instruction->GetVectorLength(), 4u);
210       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
211       break;
212     default:
213       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
214       UNREACHABLE();
215   }
216 }
217 
218 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)219 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
220   LocationSummary* locations = new (allocator) LocationSummary(instruction);
221   switch (instruction->GetPackedType()) {
222     case DataType::Type::kBool:
223       locations->SetInAt(0, Location::RequiresFpuRegister());
224       locations->SetOut(Location::RequiresFpuRegister(),
225                         instruction->IsVecNot() ? Location::kOutputOverlap
226                                                 : Location::kNoOutputOverlap);
227       break;
228     case DataType::Type::kUint8:
229     case DataType::Type::kInt8:
230     case DataType::Type::kUint16:
231     case DataType::Type::kInt16:
232     case DataType::Type::kInt32:
233     case DataType::Type::kInt64:
234     case DataType::Type::kFloat32:
235     case DataType::Type::kFloat64:
236       locations->SetInAt(0, Location::RequiresFpuRegister());
237       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
238       break;
239     default:
240       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
241       UNREACHABLE();
242   }
243 }
244 
VisitVecReduce(HVecReduce * instruction)245 void LocationsBuilderARM64Neon::VisitVecReduce(HVecReduce* instruction) {
246   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
247 }
248 
VisitVecReduce(HVecReduce * instruction)249 void InstructionCodeGeneratorARM64Neon::VisitVecReduce(HVecReduce* instruction) {
250   LocationSummary* locations = instruction->GetLocations();
251   VRegister src = VRegisterFrom(locations->InAt(0));
252   VRegister dst = DRegisterFrom(locations->Out());
253   switch (instruction->GetPackedType()) {
254     case DataType::Type::kInt32:
255       DCHECK_EQ(4u, instruction->GetVectorLength());
256       switch (instruction->GetReductionKind()) {
257         case HVecReduce::kSum:
258           __ Addv(dst.S(), src.V4S());
259           break;
260         case HVecReduce::kMin:
261           __ Sminv(dst.S(), src.V4S());
262           break;
263         case HVecReduce::kMax:
264           __ Smaxv(dst.S(), src.V4S());
265           break;
266       }
267       break;
268     case DataType::Type::kInt64:
269       DCHECK_EQ(2u, instruction->GetVectorLength());
270       switch (instruction->GetReductionKind()) {
271         case HVecReduce::kSum:
272           __ Addp(dst.D(), src.V2D());
273           break;
274         default:
275           LOG(FATAL) << "Unsupported SIMD min/max";
276           UNREACHABLE();
277       }
278       break;
279     default:
280       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
281       UNREACHABLE();
282   }
283 }
284 
VisitVecCnv(HVecCnv * instruction)285 void LocationsBuilderARM64Neon::VisitVecCnv(HVecCnv* instruction) {
286   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
287 }
288 
VisitVecCnv(HVecCnv * instruction)289 void InstructionCodeGeneratorARM64Neon::VisitVecCnv(HVecCnv* instruction) {
290   LocationSummary* locations = instruction->GetLocations();
291   VRegister src = VRegisterFrom(locations->InAt(0));
292   VRegister dst = VRegisterFrom(locations->Out());
293   DataType::Type from = instruction->GetInputType();
294   DataType::Type to = instruction->GetResultType();
295   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
296     DCHECK_EQ(4u, instruction->GetVectorLength());
297     __ Scvtf(dst.V4S(), src.V4S());
298   } else {
299     LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
300   }
301 }
302 
VisitVecNeg(HVecNeg * instruction)303 void LocationsBuilderARM64Neon::VisitVecNeg(HVecNeg* instruction) {
304   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
305 }
306 
VisitVecNeg(HVecNeg * instruction)307 void InstructionCodeGeneratorARM64Neon::VisitVecNeg(HVecNeg* instruction) {
308   LocationSummary* locations = instruction->GetLocations();
309   VRegister src = VRegisterFrom(locations->InAt(0));
310   VRegister dst = VRegisterFrom(locations->Out());
311   switch (instruction->GetPackedType()) {
312     case DataType::Type::kUint8:
313     case DataType::Type::kInt8:
314       DCHECK_EQ(16u, instruction->GetVectorLength());
315       __ Neg(dst.V16B(), src.V16B());
316       break;
317     case DataType::Type::kUint16:
318     case DataType::Type::kInt16:
319       DCHECK_EQ(8u, instruction->GetVectorLength());
320       __ Neg(dst.V8H(), src.V8H());
321       break;
322     case DataType::Type::kInt32:
323       DCHECK_EQ(4u, instruction->GetVectorLength());
324       __ Neg(dst.V4S(), src.V4S());
325       break;
326     case DataType::Type::kInt64:
327       DCHECK_EQ(2u, instruction->GetVectorLength());
328       __ Neg(dst.V2D(), src.V2D());
329       break;
330     case DataType::Type::kFloat32:
331       DCHECK_EQ(4u, instruction->GetVectorLength());
332       __ Fneg(dst.V4S(), src.V4S());
333       break;
334     case DataType::Type::kFloat64:
335       DCHECK_EQ(2u, instruction->GetVectorLength());
336       __ Fneg(dst.V2D(), src.V2D());
337       break;
338     default:
339       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
340       UNREACHABLE();
341   }
342 }
343 
VisitVecAbs(HVecAbs * instruction)344 void LocationsBuilderARM64Neon::VisitVecAbs(HVecAbs* instruction) {
345   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
346 }
347 
VisitVecAbs(HVecAbs * instruction)348 void InstructionCodeGeneratorARM64Neon::VisitVecAbs(HVecAbs* instruction) {
349   LocationSummary* locations = instruction->GetLocations();
350   VRegister src = VRegisterFrom(locations->InAt(0));
351   VRegister dst = VRegisterFrom(locations->Out());
352   switch (instruction->GetPackedType()) {
353     case DataType::Type::kInt8:
354       DCHECK_EQ(16u, instruction->GetVectorLength());
355       __ Abs(dst.V16B(), src.V16B());
356       break;
357     case DataType::Type::kInt16:
358       DCHECK_EQ(8u, instruction->GetVectorLength());
359       __ Abs(dst.V8H(), src.V8H());
360       break;
361     case DataType::Type::kInt32:
362       DCHECK_EQ(4u, instruction->GetVectorLength());
363       __ Abs(dst.V4S(), src.V4S());
364       break;
365     case DataType::Type::kInt64:
366       DCHECK_EQ(2u, instruction->GetVectorLength());
367       __ Abs(dst.V2D(), src.V2D());
368       break;
369     case DataType::Type::kFloat32:
370       DCHECK_EQ(4u, instruction->GetVectorLength());
371       __ Fabs(dst.V4S(), src.V4S());
372       break;
373     case DataType::Type::kFloat64:
374       DCHECK_EQ(2u, instruction->GetVectorLength());
375       __ Fabs(dst.V2D(), src.V2D());
376       break;
377     default:
378       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
379       UNREACHABLE();
380   }
381 }
382 
VisitVecNot(HVecNot * instruction)383 void LocationsBuilderARM64Neon::VisitVecNot(HVecNot* instruction) {
384   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
385 }
386 
VisitVecNot(HVecNot * instruction)387 void InstructionCodeGeneratorARM64Neon::VisitVecNot(HVecNot* instruction) {
388   LocationSummary* locations = instruction->GetLocations();
389   VRegister src = VRegisterFrom(locations->InAt(0));
390   VRegister dst = VRegisterFrom(locations->Out());
391   switch (instruction->GetPackedType()) {
392     case DataType::Type::kBool:  // special case boolean-not
393       DCHECK_EQ(16u, instruction->GetVectorLength());
394       __ Movi(dst.V16B(), 1);
395       __ Eor(dst.V16B(), dst.V16B(), src.V16B());
396       break;
397     case DataType::Type::kUint8:
398     case DataType::Type::kInt8:
399     case DataType::Type::kUint16:
400     case DataType::Type::kInt16:
401     case DataType::Type::kInt32:
402     case DataType::Type::kInt64:
403       __ Not(dst.V16B(), src.V16B());  // lanes do not matter
404       break;
405     default:
406       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
407       UNREACHABLE();
408   }
409 }
410 
411 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)412 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
413   LocationSummary* locations = new (allocator) LocationSummary(instruction);
414   switch (instruction->GetPackedType()) {
415     case DataType::Type::kBool:
416     case DataType::Type::kUint8:
417     case DataType::Type::kInt8:
418     case DataType::Type::kUint16:
419     case DataType::Type::kInt16:
420     case DataType::Type::kInt32:
421     case DataType::Type::kInt64:
422     case DataType::Type::kFloat32:
423     case DataType::Type::kFloat64:
424       locations->SetInAt(0, Location::RequiresFpuRegister());
425       locations->SetInAt(1, Location::RequiresFpuRegister());
426       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
427       break;
428     default:
429       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
430       UNREACHABLE();
431   }
432 }
433 
VisitVecAdd(HVecAdd * instruction)434 void LocationsBuilderARM64Neon::VisitVecAdd(HVecAdd* instruction) {
435   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
436 }
437 
VisitVecAdd(HVecAdd * instruction)438 void InstructionCodeGeneratorARM64Neon::VisitVecAdd(HVecAdd* instruction) {
439   LocationSummary* locations = instruction->GetLocations();
440   VRegister lhs = VRegisterFrom(locations->InAt(0));
441   VRegister rhs = VRegisterFrom(locations->InAt(1));
442   VRegister dst = VRegisterFrom(locations->Out());
443   switch (instruction->GetPackedType()) {
444     case DataType::Type::kUint8:
445     case DataType::Type::kInt8:
446       DCHECK_EQ(16u, instruction->GetVectorLength());
447       __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
448       break;
449     case DataType::Type::kUint16:
450     case DataType::Type::kInt16:
451       DCHECK_EQ(8u, instruction->GetVectorLength());
452       __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
453       break;
454     case DataType::Type::kInt32:
455       DCHECK_EQ(4u, instruction->GetVectorLength());
456       __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
457       break;
458     case DataType::Type::kInt64:
459       DCHECK_EQ(2u, instruction->GetVectorLength());
460       __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
461       break;
462     case DataType::Type::kFloat32:
463       DCHECK_EQ(4u, instruction->GetVectorLength());
464       __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
465       break;
466     case DataType::Type::kFloat64:
467       DCHECK_EQ(2u, instruction->GetVectorLength());
468       __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
469       break;
470     default:
471       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
472       UNREACHABLE();
473   }
474 }
475 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)476 void LocationsBuilderARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
477   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
478 }
479 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)480 void InstructionCodeGeneratorARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
481   LocationSummary* locations = instruction->GetLocations();
482   VRegister lhs = VRegisterFrom(locations->InAt(0));
483   VRegister rhs = VRegisterFrom(locations->InAt(1));
484   VRegister dst = VRegisterFrom(locations->Out());
485   switch (instruction->GetPackedType()) {
486     case DataType::Type::kUint8:
487       DCHECK_EQ(16u, instruction->GetVectorLength());
488       __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
489       break;
490     case DataType::Type::kInt8:
491       DCHECK_EQ(16u, instruction->GetVectorLength());
492       __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
493       break;
494     case DataType::Type::kUint16:
495       DCHECK_EQ(8u, instruction->GetVectorLength());
496       __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
497       break;
498     case DataType::Type::kInt16:
499       DCHECK_EQ(8u, instruction->GetVectorLength());
500       __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
501       break;
502     default:
503       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
504       UNREACHABLE();
505   }
506 }
507 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)508 void LocationsBuilderARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
509   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
510 }
511 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)512 void InstructionCodeGeneratorARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
513   LocationSummary* locations = instruction->GetLocations();
514   VRegister lhs = VRegisterFrom(locations->InAt(0));
515   VRegister rhs = VRegisterFrom(locations->InAt(1));
516   VRegister dst = VRegisterFrom(locations->Out());
517   switch (instruction->GetPackedType()) {
518     case DataType::Type::kUint8:
519       DCHECK_EQ(16u, instruction->GetVectorLength());
520       instruction->IsRounded()
521           ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
522           : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
523       break;
524     case DataType::Type::kInt8:
525       DCHECK_EQ(16u, instruction->GetVectorLength());
526       instruction->IsRounded()
527           ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
528           : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
529       break;
530     case DataType::Type::kUint16:
531       DCHECK_EQ(8u, instruction->GetVectorLength());
532       instruction->IsRounded()
533           ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
534           : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
535       break;
536     case DataType::Type::kInt16:
537       DCHECK_EQ(8u, instruction->GetVectorLength());
538       instruction->IsRounded()
539           ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
540           : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
541       break;
542     default:
543       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
544       UNREACHABLE();
545   }
546 }
547 
VisitVecSub(HVecSub * instruction)548 void LocationsBuilderARM64Neon::VisitVecSub(HVecSub* instruction) {
549   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
550 }
551 
VisitVecSub(HVecSub * instruction)552 void InstructionCodeGeneratorARM64Neon::VisitVecSub(HVecSub* instruction) {
553   LocationSummary* locations = instruction->GetLocations();
554   VRegister lhs = VRegisterFrom(locations->InAt(0));
555   VRegister rhs = VRegisterFrom(locations->InAt(1));
556   VRegister dst = VRegisterFrom(locations->Out());
557   switch (instruction->GetPackedType()) {
558     case DataType::Type::kUint8:
559     case DataType::Type::kInt8:
560       DCHECK_EQ(16u, instruction->GetVectorLength());
561       __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
562       break;
563     case DataType::Type::kUint16:
564     case DataType::Type::kInt16:
565       DCHECK_EQ(8u, instruction->GetVectorLength());
566       __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
567       break;
568     case DataType::Type::kInt32:
569       DCHECK_EQ(4u, instruction->GetVectorLength());
570       __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
571       break;
572     case DataType::Type::kInt64:
573       DCHECK_EQ(2u, instruction->GetVectorLength());
574       __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
575       break;
576     case DataType::Type::kFloat32:
577       DCHECK_EQ(4u, instruction->GetVectorLength());
578       __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
579       break;
580     case DataType::Type::kFloat64:
581       DCHECK_EQ(2u, instruction->GetVectorLength());
582       __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
583       break;
584     default:
585       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
586       UNREACHABLE();
587   }
588 }
589 
VisitVecSaturationSub(HVecSaturationSub * instruction)590 void LocationsBuilderARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
591   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
592 }
593 
VisitVecSaturationSub(HVecSaturationSub * instruction)594 void InstructionCodeGeneratorARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
595   LocationSummary* locations = instruction->GetLocations();
596   VRegister lhs = VRegisterFrom(locations->InAt(0));
597   VRegister rhs = VRegisterFrom(locations->InAt(1));
598   VRegister dst = VRegisterFrom(locations->Out());
599   switch (instruction->GetPackedType()) {
600     case DataType::Type::kUint8:
601       DCHECK_EQ(16u, instruction->GetVectorLength());
602       __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
603       break;
604     case DataType::Type::kInt8:
605       DCHECK_EQ(16u, instruction->GetVectorLength());
606       __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
607       break;
608     case DataType::Type::kUint16:
609       DCHECK_EQ(8u, instruction->GetVectorLength());
610       __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
611       break;
612     case DataType::Type::kInt16:
613       DCHECK_EQ(8u, instruction->GetVectorLength());
614       __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
615       break;
616     default:
617       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
618       UNREACHABLE();
619   }
620 }
621 
VisitVecMul(HVecMul * instruction)622 void LocationsBuilderARM64Neon::VisitVecMul(HVecMul* instruction) {
623   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
624 }
625 
VisitVecMul(HVecMul * instruction)626 void InstructionCodeGeneratorARM64Neon::VisitVecMul(HVecMul* instruction) {
627   LocationSummary* locations = instruction->GetLocations();
628   VRegister lhs = VRegisterFrom(locations->InAt(0));
629   VRegister rhs = VRegisterFrom(locations->InAt(1));
630   VRegister dst = VRegisterFrom(locations->Out());
631   switch (instruction->GetPackedType()) {
632     case DataType::Type::kUint8:
633     case DataType::Type::kInt8:
634       DCHECK_EQ(16u, instruction->GetVectorLength());
635       __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
636       break;
637     case DataType::Type::kUint16:
638     case DataType::Type::kInt16:
639       DCHECK_EQ(8u, instruction->GetVectorLength());
640       __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
641       break;
642     case DataType::Type::kInt32:
643       DCHECK_EQ(4u, instruction->GetVectorLength());
644       __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
645       break;
646     case DataType::Type::kFloat32:
647       DCHECK_EQ(4u, instruction->GetVectorLength());
648       __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
649       break;
650     case DataType::Type::kFloat64:
651       DCHECK_EQ(2u, instruction->GetVectorLength());
652       __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
653       break;
654     default:
655       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
656       UNREACHABLE();
657   }
658 }
659 
VisitVecDiv(HVecDiv * instruction)660 void LocationsBuilderARM64Neon::VisitVecDiv(HVecDiv* instruction) {
661   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
662 }
663 
VisitVecDiv(HVecDiv * instruction)664 void InstructionCodeGeneratorARM64Neon::VisitVecDiv(HVecDiv* instruction) {
665   LocationSummary* locations = instruction->GetLocations();
666   VRegister lhs = VRegisterFrom(locations->InAt(0));
667   VRegister rhs = VRegisterFrom(locations->InAt(1));
668   VRegister dst = VRegisterFrom(locations->Out());
669   switch (instruction->GetPackedType()) {
670     case DataType::Type::kFloat32:
671       DCHECK_EQ(4u, instruction->GetVectorLength());
672       __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
673       break;
674     case DataType::Type::kFloat64:
675       DCHECK_EQ(2u, instruction->GetVectorLength());
676       __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
677       break;
678     default:
679       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
680       UNREACHABLE();
681   }
682 }
683 
VisitVecMin(HVecMin * instruction)684 void LocationsBuilderARM64Neon::VisitVecMin(HVecMin* instruction) {
685   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
686 }
687 
VisitVecMin(HVecMin * instruction)688 void InstructionCodeGeneratorARM64Neon::VisitVecMin(HVecMin* instruction) {
689   LocationSummary* locations = instruction->GetLocations();
690   VRegister lhs = VRegisterFrom(locations->InAt(0));
691   VRegister rhs = VRegisterFrom(locations->InAt(1));
692   VRegister dst = VRegisterFrom(locations->Out());
693   switch (instruction->GetPackedType()) {
694     case DataType::Type::kUint8:
695       DCHECK_EQ(16u, instruction->GetVectorLength());
696       __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
697       break;
698     case DataType::Type::kInt8:
699       DCHECK_EQ(16u, instruction->GetVectorLength());
700       __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
701       break;
702     case DataType::Type::kUint16:
703       DCHECK_EQ(8u, instruction->GetVectorLength());
704       __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
705       break;
706     case DataType::Type::kInt16:
707       DCHECK_EQ(8u, instruction->GetVectorLength());
708       __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
709       break;
710     case DataType::Type::kUint32:
711       DCHECK_EQ(4u, instruction->GetVectorLength());
712       __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
713       break;
714     case DataType::Type::kInt32:
715       DCHECK_EQ(4u, instruction->GetVectorLength());
716       __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
717       break;
718     case DataType::Type::kFloat32:
719       DCHECK_EQ(4u, instruction->GetVectorLength());
720       __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
721       break;
722     case DataType::Type::kFloat64:
723       DCHECK_EQ(2u, instruction->GetVectorLength());
724       __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
725       break;
726     default:
727       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
728       UNREACHABLE();
729   }
730 }
731 
VisitVecMax(HVecMax * instruction)732 void LocationsBuilderARM64Neon::VisitVecMax(HVecMax* instruction) {
733   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
734 }
735 
VisitVecMax(HVecMax * instruction)736 void InstructionCodeGeneratorARM64Neon::VisitVecMax(HVecMax* instruction) {
737   LocationSummary* locations = instruction->GetLocations();
738   VRegister lhs = VRegisterFrom(locations->InAt(0));
739   VRegister rhs = VRegisterFrom(locations->InAt(1));
740   VRegister dst = VRegisterFrom(locations->Out());
741   switch (instruction->GetPackedType()) {
742     case DataType::Type::kUint8:
743       DCHECK_EQ(16u, instruction->GetVectorLength());
744       __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
745       break;
746     case DataType::Type::kInt8:
747       DCHECK_EQ(16u, instruction->GetVectorLength());
748       __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
749       break;
750     case DataType::Type::kUint16:
751       DCHECK_EQ(8u, instruction->GetVectorLength());
752       __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
753       break;
754     case DataType::Type::kInt16:
755       DCHECK_EQ(8u, instruction->GetVectorLength());
756       __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
757       break;
758     case DataType::Type::kUint32:
759       DCHECK_EQ(4u, instruction->GetVectorLength());
760       __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
761       break;
762     case DataType::Type::kInt32:
763       DCHECK_EQ(4u, instruction->GetVectorLength());
764       __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
765       break;
766     case DataType::Type::kFloat32:
767       DCHECK_EQ(4u, instruction->GetVectorLength());
768       __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
769       break;
770     case DataType::Type::kFloat64:
771       DCHECK_EQ(2u, instruction->GetVectorLength());
772       __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
773       break;
774     default:
775       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
776       UNREACHABLE();
777   }
778 }
779 
VisitVecAnd(HVecAnd * instruction)780 void LocationsBuilderARM64Neon::VisitVecAnd(HVecAnd* instruction) {
781   // TODO: Allow constants supported by BIC (vector, immediate).
782   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
783 }
784 
VisitVecAnd(HVecAnd * instruction)785 void InstructionCodeGeneratorARM64Neon::VisitVecAnd(HVecAnd* instruction) {
786   LocationSummary* locations = instruction->GetLocations();
787   VRegister lhs = VRegisterFrom(locations->InAt(0));
788   VRegister rhs = VRegisterFrom(locations->InAt(1));
789   VRegister dst = VRegisterFrom(locations->Out());
790   switch (instruction->GetPackedType()) {
791     case DataType::Type::kBool:
792     case DataType::Type::kUint8:
793     case DataType::Type::kInt8:
794     case DataType::Type::kUint16:
795     case DataType::Type::kInt16:
796     case DataType::Type::kInt32:
797     case DataType::Type::kInt64:
798     case DataType::Type::kFloat32:
799     case DataType::Type::kFloat64:
800       __ And(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
801       break;
802     default:
803       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
804       UNREACHABLE();
805   }
806 }
807 
VisitVecAndNot(HVecAndNot * instruction)808 void LocationsBuilderARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
809   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
810 }
811 
VisitVecAndNot(HVecAndNot * instruction)812 void InstructionCodeGeneratorARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
813   // TODO: Use BIC (vector, register).
814   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
815 }
816 
VisitVecOr(HVecOr * instruction)817 void LocationsBuilderARM64Neon::VisitVecOr(HVecOr* instruction) {
818   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
819 }
820 
VisitVecOr(HVecOr * instruction)821 void InstructionCodeGeneratorARM64Neon::VisitVecOr(HVecOr* instruction) {
822   LocationSummary* locations = instruction->GetLocations();
823   VRegister lhs = VRegisterFrom(locations->InAt(0));
824   VRegister rhs = VRegisterFrom(locations->InAt(1));
825   VRegister dst = VRegisterFrom(locations->Out());
826   switch (instruction->GetPackedType()) {
827     case DataType::Type::kBool:
828     case DataType::Type::kUint8:
829     case DataType::Type::kInt8:
830     case DataType::Type::kUint16:
831     case DataType::Type::kInt16:
832     case DataType::Type::kInt32:
833     case DataType::Type::kInt64:
834     case DataType::Type::kFloat32:
835     case DataType::Type::kFloat64:
836       __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
837       break;
838     default:
839       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
840       UNREACHABLE();
841   }
842 }
843 
VisitVecXor(HVecXor * instruction)844 void LocationsBuilderARM64Neon::VisitVecXor(HVecXor* instruction) {
845   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
846 }
847 
VisitVecXor(HVecXor * instruction)848 void InstructionCodeGeneratorARM64Neon::VisitVecXor(HVecXor* instruction) {
849   LocationSummary* locations = instruction->GetLocations();
850   VRegister lhs = VRegisterFrom(locations->InAt(0));
851   VRegister rhs = VRegisterFrom(locations->InAt(1));
852   VRegister dst = VRegisterFrom(locations->Out());
853   switch (instruction->GetPackedType()) {
854     case DataType::Type::kBool:
855     case DataType::Type::kUint8:
856     case DataType::Type::kInt8:
857     case DataType::Type::kUint16:
858     case DataType::Type::kInt16:
859     case DataType::Type::kInt32:
860     case DataType::Type::kInt64:
861     case DataType::Type::kFloat32:
862     case DataType::Type::kFloat64:
863       __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
864       break;
865     default:
866       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
867       UNREACHABLE();
868   }
869 }
870 
871 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)872 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
873   LocationSummary* locations = new (allocator) LocationSummary(instruction);
874   switch (instruction->GetPackedType()) {
875     case DataType::Type::kUint8:
876     case DataType::Type::kInt8:
877     case DataType::Type::kUint16:
878     case DataType::Type::kInt16:
879     case DataType::Type::kInt32:
880     case DataType::Type::kInt64:
881       locations->SetInAt(0, Location::RequiresFpuRegister());
882       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
883       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
884       break;
885     default:
886       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
887       UNREACHABLE();
888   }
889 }
890 
VisitVecShl(HVecShl * instruction)891 void LocationsBuilderARM64Neon::VisitVecShl(HVecShl* instruction) {
892   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
893 }
894 
VisitVecShl(HVecShl * instruction)895 void InstructionCodeGeneratorARM64Neon::VisitVecShl(HVecShl* instruction) {
896   LocationSummary* locations = instruction->GetLocations();
897   VRegister lhs = VRegisterFrom(locations->InAt(0));
898   VRegister dst = VRegisterFrom(locations->Out());
899   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
900   switch (instruction->GetPackedType()) {
901     case DataType::Type::kUint8:
902     case DataType::Type::kInt8:
903       DCHECK_EQ(16u, instruction->GetVectorLength());
904       __ Shl(dst.V16B(), lhs.V16B(), value);
905       break;
906     case DataType::Type::kUint16:
907     case DataType::Type::kInt16:
908       DCHECK_EQ(8u, instruction->GetVectorLength());
909       __ Shl(dst.V8H(), lhs.V8H(), value);
910       break;
911     case DataType::Type::kInt32:
912       DCHECK_EQ(4u, instruction->GetVectorLength());
913       __ Shl(dst.V4S(), lhs.V4S(), value);
914       break;
915     case DataType::Type::kInt64:
916       DCHECK_EQ(2u, instruction->GetVectorLength());
917       __ Shl(dst.V2D(), lhs.V2D(), value);
918       break;
919     default:
920       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
921       UNREACHABLE();
922   }
923 }
924 
VisitVecShr(HVecShr * instruction)925 void LocationsBuilderARM64Neon::VisitVecShr(HVecShr* instruction) {
926   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
927 }
928 
VisitVecShr(HVecShr * instruction)929 void InstructionCodeGeneratorARM64Neon::VisitVecShr(HVecShr* instruction) {
930   LocationSummary* locations = instruction->GetLocations();
931   VRegister lhs = VRegisterFrom(locations->InAt(0));
932   VRegister dst = VRegisterFrom(locations->Out());
933   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
934   switch (instruction->GetPackedType()) {
935     case DataType::Type::kUint8:
936     case DataType::Type::kInt8:
937       DCHECK_EQ(16u, instruction->GetVectorLength());
938       __ Sshr(dst.V16B(), lhs.V16B(), value);
939       break;
940     case DataType::Type::kUint16:
941     case DataType::Type::kInt16:
942       DCHECK_EQ(8u, instruction->GetVectorLength());
943       __ Sshr(dst.V8H(), lhs.V8H(), value);
944       break;
945     case DataType::Type::kInt32:
946       DCHECK_EQ(4u, instruction->GetVectorLength());
947       __ Sshr(dst.V4S(), lhs.V4S(), value);
948       break;
949     case DataType::Type::kInt64:
950       DCHECK_EQ(2u, instruction->GetVectorLength());
951       __ Sshr(dst.V2D(), lhs.V2D(), value);
952       break;
953     default:
954       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
955       UNREACHABLE();
956   }
957 }
958 
VisitVecUShr(HVecUShr * instruction)959 void LocationsBuilderARM64Neon::VisitVecUShr(HVecUShr* instruction) {
960   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
961 }
962 
VisitVecUShr(HVecUShr * instruction)963 void InstructionCodeGeneratorARM64Neon::VisitVecUShr(HVecUShr* instruction) {
964   LocationSummary* locations = instruction->GetLocations();
965   VRegister lhs = VRegisterFrom(locations->InAt(0));
966   VRegister dst = VRegisterFrom(locations->Out());
967   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
968   switch (instruction->GetPackedType()) {
969     case DataType::Type::kUint8:
970     case DataType::Type::kInt8:
971       DCHECK_EQ(16u, instruction->GetVectorLength());
972       __ Ushr(dst.V16B(), lhs.V16B(), value);
973       break;
974     case DataType::Type::kUint16:
975     case DataType::Type::kInt16:
976       DCHECK_EQ(8u, instruction->GetVectorLength());
977       __ Ushr(dst.V8H(), lhs.V8H(), value);
978       break;
979     case DataType::Type::kInt32:
980       DCHECK_EQ(4u, instruction->GetVectorLength());
981       __ Ushr(dst.V4S(), lhs.V4S(), value);
982       break;
983     case DataType::Type::kInt64:
984       DCHECK_EQ(2u, instruction->GetVectorLength());
985       __ Ushr(dst.V2D(), lhs.V2D(), value);
986       break;
987     default:
988       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
989       UNREACHABLE();
990   }
991 }
992 
VisitVecSetScalars(HVecSetScalars * instruction)993 void LocationsBuilderARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
994   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
995 
996   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
997 
998   HInstruction* input = instruction->InputAt(0);
999   bool is_zero = IsZeroBitPattern(input);
1000 
1001   switch (instruction->GetPackedType()) {
1002     case DataType::Type::kBool:
1003     case DataType::Type::kUint8:
1004     case DataType::Type::kInt8:
1005     case DataType::Type::kUint16:
1006     case DataType::Type::kInt16:
1007     case DataType::Type::kInt32:
1008     case DataType::Type::kInt64:
1009       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
1010                                     : Location::RequiresRegister());
1011       locations->SetOut(Location::RequiresFpuRegister());
1012       break;
1013     case DataType::Type::kFloat32:
1014     case DataType::Type::kFloat64:
1015       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
1016                                     : Location::RequiresFpuRegister());
1017       locations->SetOut(Location::RequiresFpuRegister());
1018       break;
1019     default:
1020       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1021       UNREACHABLE();
1022   }
1023 }
1024 
VisitVecSetScalars(HVecSetScalars * instruction)1025 void InstructionCodeGeneratorARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
1026   LocationSummary* locations = instruction->GetLocations();
1027   VRegister dst = VRegisterFrom(locations->Out());
1028 
1029   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
1030 
1031   // Zero out all other elements first.
1032   __ Movi(dst.V16B(), 0);
1033 
1034   // Shorthand for any type of zero.
1035   if (IsZeroBitPattern(instruction->InputAt(0))) {
1036     return;
1037   }
1038 
1039   // Set required elements.
1040   switch (instruction->GetPackedType()) {
1041     case DataType::Type::kBool:
1042     case DataType::Type::kUint8:
1043     case DataType::Type::kInt8:
1044       DCHECK_EQ(16u, instruction->GetVectorLength());
1045       __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
1046       break;
1047     case DataType::Type::kUint16:
1048     case DataType::Type::kInt16:
1049       DCHECK_EQ(8u, instruction->GetVectorLength());
1050       __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
1051       break;
1052     case DataType::Type::kInt32:
1053       DCHECK_EQ(4u, instruction->GetVectorLength());
1054       __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
1055       break;
1056     case DataType::Type::kInt64:
1057       DCHECK_EQ(2u, instruction->GetVectorLength());
1058       __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
1059       break;
1060     default:
1061       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1062       UNREACHABLE();
1063   }
1064 }
1065 
1066 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1067 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1068   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1069   switch (instruction->GetPackedType()) {
1070     case DataType::Type::kUint8:
1071     case DataType::Type::kInt8:
1072     case DataType::Type::kUint16:
1073     case DataType::Type::kInt16:
1074     case DataType::Type::kInt32:
1075     case DataType::Type::kInt64:
1076       locations->SetInAt(0, Location::RequiresFpuRegister());
1077       locations->SetInAt(1, Location::RequiresFpuRegister());
1078       locations->SetInAt(2, Location::RequiresFpuRegister());
1079       locations->SetOut(Location::SameAsFirstInput());
1080       break;
1081     default:
1082       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1083       UNREACHABLE();
1084   }
1085 }
1086 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1087 void LocationsBuilderARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1088   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1089 }
1090 
1091 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
1092 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
1093 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1094 void InstructionCodeGeneratorARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1095   LocationSummary* locations = instruction->GetLocations();
1096   VRegister acc = VRegisterFrom(locations->InAt(0));
1097   VRegister left = VRegisterFrom(locations->InAt(1));
1098   VRegister right = VRegisterFrom(locations->InAt(2));
1099 
1100   DCHECK(locations->InAt(0).Equals(locations->Out()));
1101 
1102   switch (instruction->GetPackedType()) {
1103     case DataType::Type::kUint8:
1104     case DataType::Type::kInt8:
1105       DCHECK_EQ(16u, instruction->GetVectorLength());
1106       if (instruction->GetOpKind() == HInstruction::kAdd) {
1107         __ Mla(acc.V16B(), left.V16B(), right.V16B());
1108       } else {
1109         __ Mls(acc.V16B(), left.V16B(), right.V16B());
1110       }
1111       break;
1112     case DataType::Type::kUint16:
1113     case DataType::Type::kInt16:
1114       DCHECK_EQ(8u, instruction->GetVectorLength());
1115       if (instruction->GetOpKind() == HInstruction::kAdd) {
1116         __ Mla(acc.V8H(), left.V8H(), right.V8H());
1117       } else {
1118         __ Mls(acc.V8H(), left.V8H(), right.V8H());
1119       }
1120       break;
1121     case DataType::Type::kInt32:
1122       DCHECK_EQ(4u, instruction->GetVectorLength());
1123       if (instruction->GetOpKind() == HInstruction::kAdd) {
1124         __ Mla(acc.V4S(), left.V4S(), right.V4S());
1125       } else {
1126         __ Mls(acc.V4S(), left.V4S(), right.V4S());
1127       }
1128       break;
1129     default:
1130       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1131       UNREACHABLE();
1132   }
1133 }
1134 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1135 void LocationsBuilderARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1136   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1137   // Some conversions require temporary registers.
1138   LocationSummary* locations = instruction->GetLocations();
1139   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1140   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1141   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1142             HVecOperation::ToSignedType(b->GetPackedType()));
1143   switch (a->GetPackedType()) {
1144     case DataType::Type::kUint8:
1145     case DataType::Type::kInt8:
1146       switch (instruction->GetPackedType()) {
1147         case DataType::Type::kInt64:
1148           locations->AddTemp(Location::RequiresFpuRegister());
1149           locations->AddTemp(Location::RequiresFpuRegister());
1150           FALLTHROUGH_INTENDED;
1151         case DataType::Type::kInt32:
1152           locations->AddTemp(Location::RequiresFpuRegister());
1153           locations->AddTemp(Location::RequiresFpuRegister());
1154           break;
1155         default:
1156           break;
1157       }
1158       break;
1159     case DataType::Type::kUint16:
1160     case DataType::Type::kInt16:
1161       if (instruction->GetPackedType() == DataType::Type::kInt64) {
1162         locations->AddTemp(Location::RequiresFpuRegister());
1163         locations->AddTemp(Location::RequiresFpuRegister());
1164       }
1165       break;
1166     case DataType::Type::kInt32:
1167     case DataType::Type::kInt64:
1168       if (instruction->GetPackedType() == a->GetPackedType()) {
1169         locations->AddTemp(Location::RequiresFpuRegister());
1170       }
1171       break;
1172     default:
1173       break;
1174   }
1175 }
1176 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1177 void InstructionCodeGeneratorARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1178   LocationSummary* locations = instruction->GetLocations();
1179   VRegister acc = VRegisterFrom(locations->InAt(0));
1180   VRegister left = VRegisterFrom(locations->InAt(1));
1181   VRegister right = VRegisterFrom(locations->InAt(2));
1182 
1183   DCHECK(locations->InAt(0).Equals(locations->Out()));
1184 
1185   // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
1186   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1187   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1188   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1189             HVecOperation::ToSignedType(b->GetPackedType()));
1190   switch (a->GetPackedType()) {
1191     case DataType::Type::kUint8:
1192     case DataType::Type::kInt8:
1193       DCHECK_EQ(16u, a->GetVectorLength());
1194       switch (instruction->GetPackedType()) {
1195         case DataType::Type::kInt16:
1196           DCHECK_EQ(8u, instruction->GetVectorLength());
1197           __ Sabal(acc.V8H(), left.V8B(), right.V8B());
1198           __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
1199           break;
1200         case DataType::Type::kInt32: {
1201           DCHECK_EQ(4u, instruction->GetVectorLength());
1202           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1203           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1204           __ Sxtl(tmp1.V8H(), left.V8B());
1205           __ Sxtl(tmp2.V8H(), right.V8B());
1206           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1207           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1208           __ Sxtl2(tmp1.V8H(), left.V16B());
1209           __ Sxtl2(tmp2.V8H(), right.V16B());
1210           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1211           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1212           break;
1213         }
1214         case DataType::Type::kInt64: {
1215           DCHECK_EQ(2u, instruction->GetVectorLength());
1216           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1217           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1218           VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
1219           VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
1220           __ Sxtl(tmp1.V8H(), left.V8B());
1221           __ Sxtl(tmp2.V8H(), right.V8B());
1222           __ Sxtl(tmp3.V4S(), tmp1.V4H());
1223           __ Sxtl(tmp4.V4S(), tmp2.V4H());
1224           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1225           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1226           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1227           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1228           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1229           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1230           __ Sxtl2(tmp1.V8H(), left.V16B());
1231           __ Sxtl2(tmp2.V8H(), right.V16B());
1232           __ Sxtl(tmp3.V4S(), tmp1.V4H());
1233           __ Sxtl(tmp4.V4S(), tmp2.V4H());
1234           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1235           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1236           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1237           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1238           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1239           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1240           break;
1241         }
1242         default:
1243           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1244           UNREACHABLE();
1245       }
1246       break;
1247     case DataType::Type::kUint16:
1248     case DataType::Type::kInt16:
1249       DCHECK_EQ(8u, a->GetVectorLength());
1250       switch (instruction->GetPackedType()) {
1251         case DataType::Type::kInt32:
1252           DCHECK_EQ(4u, instruction->GetVectorLength());
1253           __ Sabal(acc.V4S(), left.V4H(), right.V4H());
1254           __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
1255           break;
1256         case DataType::Type::kInt64: {
1257           DCHECK_EQ(2u, instruction->GetVectorLength());
1258           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1259           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1260           __ Sxtl(tmp1.V4S(), left.V4H());
1261           __ Sxtl(tmp2.V4S(), right.V4H());
1262           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1263           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1264           __ Sxtl2(tmp1.V4S(), left.V8H());
1265           __ Sxtl2(tmp2.V4S(), right.V8H());
1266           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1267           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1268           break;
1269         }
1270         default:
1271           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1272           UNREACHABLE();
1273       }
1274       break;
1275     case DataType::Type::kInt32:
1276       DCHECK_EQ(4u, a->GetVectorLength());
1277       switch (instruction->GetPackedType()) {
1278         case DataType::Type::kInt32: {
1279           DCHECK_EQ(4u, instruction->GetVectorLength());
1280           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1281           __ Sub(tmp.V4S(), left.V4S(), right.V4S());
1282           __ Abs(tmp.V4S(), tmp.V4S());
1283           __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
1284           break;
1285         }
1286         case DataType::Type::kInt64:
1287           DCHECK_EQ(2u, instruction->GetVectorLength());
1288           __ Sabal(acc.V2D(), left.V2S(), right.V2S());
1289           __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
1290           break;
1291         default:
1292           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1293           UNREACHABLE();
1294       }
1295       break;
1296     case DataType::Type::kInt64:
1297       DCHECK_EQ(2u, a->GetVectorLength());
1298       switch (instruction->GetPackedType()) {
1299         case DataType::Type::kInt64: {
1300           DCHECK_EQ(2u, instruction->GetVectorLength());
1301           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1302           __ Sub(tmp.V2D(), left.V2D(), right.V2D());
1303           __ Abs(tmp.V2D(), tmp.V2D());
1304           __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
1305           break;
1306         }
1307         default:
1308           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1309           UNREACHABLE();
1310       }
1311       break;
1312     default:
1313       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1314   }
1315 }
1316 
VisitVecDotProd(HVecDotProd * instruction)1317 void LocationsBuilderARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
1318   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1319   DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1320   locations->SetInAt(0, Location::RequiresFpuRegister());
1321   locations->SetInAt(1, Location::RequiresFpuRegister());
1322   locations->SetInAt(2, Location::RequiresFpuRegister());
1323   locations->SetOut(Location::SameAsFirstInput());
1324 
1325   // For Int8 and Uint8 general case we need a temp register.
1326   if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
1327       !ShouldEmitDotProductInstructions(codegen_)) {
1328     locations->AddTemp(Location::RequiresFpuRegister());
1329   }
1330 }
1331 
VisitVecDotProd(HVecDotProd * instruction)1332 void InstructionCodeGeneratorARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
1333   LocationSummary* locations = instruction->GetLocations();
1334   DCHECK(locations->InAt(0).Equals(locations->Out()));
1335   VRegister acc = VRegisterFrom(locations->InAt(0));
1336   VRegister left = VRegisterFrom(locations->InAt(1));
1337   VRegister right = VRegisterFrom(locations->InAt(2));
1338   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1339   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1340   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1341             HVecOperation::ToSignedType(b->GetPackedType()));
1342   DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1343   DCHECK_EQ(4u, instruction->GetVectorLength());
1344 
1345   size_t inputs_data_size = DataType::Size(a->GetPackedType());
1346   switch (inputs_data_size) {
1347     case 1u: {
1348       DCHECK_EQ(16u, a->GetVectorLength());
1349       if (instruction->IsZeroExtending()) {
1350         if (ShouldEmitDotProductInstructions(codegen_)) {
1351           __ Udot(acc.V4S(), left.V16B(), right.V16B());
1352         } else {
1353           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1354           __ Umull(tmp.V8H(), left.V8B(), right.V8B());
1355           __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1356           __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1357 
1358           __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
1359           __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1360           __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1361         }
1362       } else {
1363         if (ShouldEmitDotProductInstructions(codegen_)) {
1364           __ Sdot(acc.V4S(), left.V16B(), right.V16B());
1365         } else {
1366           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1367           __ Smull(tmp.V8H(), left.V8B(), right.V8B());
1368           __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1369           __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1370 
1371           __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
1372           __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1373           __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1374         }
1375       }
1376       break;
1377     }
1378     case 2u:
1379       DCHECK_EQ(8u, a->GetVectorLength());
1380       if (instruction->IsZeroExtending()) {
1381         __ Umlal(acc.V4S(), left.V4H(), right.V4H());
1382         __ Umlal2(acc.V4S(), left.V8H(), right.V8H());
1383       } else {
1384         __ Smlal(acc.V4S(), left.V4H(), right.V4H());
1385         __ Smlal2(acc.V4S(), left.V8H(), right.V8H());
1386       }
1387       break;
1388     default:
1389       LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1390   }
1391 }
1392 
1393 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1394 static void CreateVecMemLocations(ArenaAllocator* allocator,
1395                                   HVecMemoryOperation* instruction,
1396                                   bool is_load) {
1397   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1398   switch (instruction->GetPackedType()) {
1399     case DataType::Type::kBool:
1400     case DataType::Type::kUint8:
1401     case DataType::Type::kInt8:
1402     case DataType::Type::kUint16:
1403     case DataType::Type::kInt16:
1404     case DataType::Type::kInt32:
1405     case DataType::Type::kInt64:
1406     case DataType::Type::kFloat32:
1407     case DataType::Type::kFloat64:
1408       locations->SetInAt(0, Location::RequiresRegister());
1409       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1410       if (is_load) {
1411         locations->SetOut(Location::RequiresFpuRegister());
1412       } else {
1413         locations->SetInAt(2, Location::RequiresFpuRegister());
1414       }
1415       break;
1416     default:
1417       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1418       UNREACHABLE();
1419   }
1420 }
1421 
VisitVecLoad(HVecLoad * instruction)1422 void LocationsBuilderARM64Neon::VisitVecLoad(HVecLoad* instruction) {
1423   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1424 }
1425 
VisitVecLoad(HVecLoad * instruction)1426 void InstructionCodeGeneratorARM64Neon::VisitVecLoad(HVecLoad* instruction) {
1427   LocationSummary* locations = instruction->GetLocations();
1428   size_t size = DataType::Size(instruction->GetPackedType());
1429   VRegister reg = VRegisterFrom(locations->Out());
1430   UseScratchRegisterScope temps(GetVIXLAssembler());
1431   Register scratch;
1432 
1433   switch (instruction->GetPackedType()) {
1434     case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1435     case DataType::Type::kUint16:
1436       DCHECK_EQ(8u, instruction->GetVectorLength());
1437       // Special handling of compressed/uncompressed string load.
1438       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1439         vixl::aarch64::Label uncompressed_load, done;
1440         // Test compression bit.
1441         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1442                       "Expecting 0=compressed, 1=uncompressed");
1443         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1444         Register length = temps.AcquireW();
1445         __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
1446         __ Tbnz(length.W(), 0, &uncompressed_load);
1447         temps.Release(length);  // no longer needed
1448         // Zero extend 8 compressed bytes into 8 chars.
1449         __ Ldr(DRegisterFrom(locations->Out()).V8B(),
1450                VecNEONAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
1451         __ Uxtl(reg.V8H(), reg.V8B());
1452         __ B(&done);
1453         if (scratch.IsValid()) {
1454           temps.Release(scratch);  // if used, no longer needed
1455         }
1456         // Load 8 direct uncompressed chars.
1457         __ Bind(&uncompressed_load);
1458         __ Ldr(reg,
1459                VecNEONAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
1460         __ Bind(&done);
1461         return;
1462       }
1463       FALLTHROUGH_INTENDED;
1464     case DataType::Type::kBool:
1465     case DataType::Type::kUint8:
1466     case DataType::Type::kInt8:
1467     case DataType::Type::kInt32:
1468     case DataType::Type::kFloat32:
1469     case DataType::Type::kInt64:
1470     case DataType::Type::kFloat64:
1471       DCHECK_LE(2u, instruction->GetVectorLength());
1472       DCHECK_LE(instruction->GetVectorLength(), 16u);
1473       __ Ldr(reg,
1474              VecNEONAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
1475       break;
1476     default:
1477       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1478       UNREACHABLE();
1479   }
1480 }
1481 
VisitVecStore(HVecStore * instruction)1482 void LocationsBuilderARM64Neon::VisitVecStore(HVecStore* instruction) {
1483   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1484 }
1485 
VisitVecStore(HVecStore * instruction)1486 void InstructionCodeGeneratorARM64Neon::VisitVecStore(HVecStore* instruction) {
1487   LocationSummary* locations = instruction->GetLocations();
1488   size_t size = DataType::Size(instruction->GetPackedType());
1489   VRegister reg = VRegisterFrom(locations->InAt(2));
1490   UseScratchRegisterScope temps(GetVIXLAssembler());
1491   Register scratch;
1492 
1493   switch (instruction->GetPackedType()) {
1494     case DataType::Type::kBool:
1495     case DataType::Type::kUint8:
1496     case DataType::Type::kInt8:
1497     case DataType::Type::kUint16:
1498     case DataType::Type::kInt16:
1499     case DataType::Type::kInt32:
1500     case DataType::Type::kFloat32:
1501     case DataType::Type::kInt64:
1502     case DataType::Type::kFloat64:
1503       DCHECK_LE(2u, instruction->GetVectorLength());
1504       DCHECK_LE(instruction->GetVectorLength(), 16u);
1505       __ Str(reg,
1506              VecNEONAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1507       break;
1508     default:
1509       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1510       UNREACHABLE();
1511   }
1512 }
1513 
VisitVecPredSetAll(HVecPredSetAll * instruction)1514 void LocationsBuilderARM64Neon::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1515   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1516   DCHECK(instruction->InputAt(0)->IsIntConstant());
1517   locations->SetInAt(0, Location::NoLocation());
1518   locations->SetOut(Location::NoLocation());
1519 }
1520 
VisitVecPredSetAll(HVecPredSetAll *)1521 void InstructionCodeGeneratorARM64Neon::VisitVecPredSetAll(HVecPredSetAll*) {
1522 }
1523 
VisitVecPredWhile(HVecPredWhile * instruction)1524 void LocationsBuilderARM64Neon::VisitVecPredWhile(HVecPredWhile* instruction) {
1525   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1526   UNREACHABLE();
1527 }
1528 
VisitVecPredWhile(HVecPredWhile * instruction)1529 void InstructionCodeGeneratorARM64Neon::VisitVecPredWhile(HVecPredWhile* instruction) {
1530   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1531   UNREACHABLE();
1532 }
1533 
VisitVecPredToBoolean(HVecPredToBoolean * instruction)1534 void LocationsBuilderARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
1535   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1536   UNREACHABLE();
1537 }
1538 
VisitVecPredToBoolean(HVecPredToBoolean * instruction)1539 void InstructionCodeGeneratorARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
1540   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1541   UNREACHABLE();
1542 }
1543 
VisitVecCondition(HVecCondition * instruction)1544 void LocationsBuilderARM64Neon::VisitVecCondition(HVecCondition* instruction) {
1545   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1546   UNREACHABLE();
1547 }
1548 
VisitVecCondition(HVecCondition * instruction)1549 void InstructionCodeGeneratorARM64Neon::VisitVecCondition(HVecCondition* instruction) {
1550   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1551   UNREACHABLE();
1552 }
1553 
VisitVecPredNot(HVecPredNot * instruction)1554 void LocationsBuilderARM64Neon::VisitVecPredNot(HVecPredNot* instruction) {
1555   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1556   UNREACHABLE();
1557 }
1558 
VisitVecPredNot(HVecPredNot * instruction)1559 void InstructionCodeGeneratorARM64Neon::VisitVecPredNot(HVecPredNot* instruction) {
1560   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1561   UNREACHABLE();
1562 }
1563 
AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope * scope)1564 Location InstructionCodeGeneratorARM64Neon::AllocateSIMDScratchLocation(
1565     vixl::aarch64::UseScratchRegisterScope* scope) {
1566   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1567   return LocationFrom(scope->AcquireVRegisterOfSize(kQRegSize));
1568 }
1569 
FreeSIMDScratchLocation(Location loc,vixl::aarch64::UseScratchRegisterScope * scope)1570 void InstructionCodeGeneratorARM64Neon::FreeSIMDScratchLocation(Location loc,
1571     vixl::aarch64::UseScratchRegisterScope* scope) {
1572   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1573   scope->Release(QRegisterFrom(loc));
1574 }
1575 
LoadSIMDRegFromStack(Location destination,Location source)1576 void InstructionCodeGeneratorARM64Neon::LoadSIMDRegFromStack(Location destination,
1577                                                              Location source) {
1578   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1579   __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1580 }
1581 
MoveSIMDRegToSIMDReg(Location destination,Location source)1582 void InstructionCodeGeneratorARM64Neon::MoveSIMDRegToSIMDReg(Location destination,
1583                                                              Location source) {
1584   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1585   __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1586 }
1587 
MoveToSIMDStackSlot(Location destination,Location source)1588 void InstructionCodeGeneratorARM64Neon::MoveToSIMDStackSlot(Location destination,
1589                                                             Location source) {
1590   DCHECK(destination.IsSIMDStackSlot());
1591   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1592 
1593   if (source.IsFpuRegister()) {
1594     __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1595   } else {
1596     DCHECK(source.IsSIMDStackSlot());
1597     UseScratchRegisterScope temps(GetVIXLAssembler());
1598     if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
1599       Register temp = temps.AcquireX();
1600       __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1601       __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1602       __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1603       __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1604     } else {
1605       VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1606       __ Ldr(temp, StackOperandFrom(source));
1607       __ Str(temp, StackOperandFrom(destination));
1608     }
1609   }
1610 }
1611 
1612 // Calculate memory accessing operand for save/restore live registers.
1613 template <bool is_save>
SaveRestoreLiveRegistersHelperNeonImpl(CodeGeneratorARM64 * codegen,LocationSummary * locations,int64_t spill_offset)1614 void SaveRestoreLiveRegistersHelperNeonImpl(CodeGeneratorARM64* codegen,
1615                                             LocationSummary* locations,
1616                                             int64_t spill_offset) {
1617   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
1618   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
1619   DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
1620                                                   codegen->GetNumberOfCoreRegisters(),
1621                                                   fp_spills,
1622                                                   codegen->GetNumberOfFloatingPointRegisters()));
1623 
1624   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
1625   const unsigned v_reg_size_in_bits = codegen->GetSlowPathFPWidth() * 8;
1626   DCHECK_LE(codegen->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1627   CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size_in_bits, fp_spills);
1628 
1629   MacroAssembler* masm = codegen->GetVIXLAssembler();
1630   UseScratchRegisterScope temps(masm);
1631 
1632   Register base = masm->StackPointer();
1633   int64_t core_spill_size = core_list.GetTotalSizeInBytes();
1634   int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
1635   int64_t reg_size = kXRegSizeInBytes;
1636   int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
1637   uint32_t ls_access_size = WhichPowerOf2(reg_size);
1638   if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
1639       !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
1640     // If the offset does not fit in the instruction's immediate field, use an alternate register
1641     // to compute the base address(float point registers spill base address).
1642     Register new_base = temps.AcquireSameSizeAs(base);
1643     masm->Add(new_base, base, Operand(spill_offset + core_spill_size));
1644     base = new_base;
1645     spill_offset = -core_spill_size;
1646     int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
1647     DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
1648     DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
1649   }
1650 
1651   if (is_save) {
1652     masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1653     masm->StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
1654   } else {
1655     masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1656     masm->LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
1657   }
1658 }
1659 
SaveLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1660 void InstructionCodeGeneratorARM64Neon::SaveLiveRegistersHelper(LocationSummary* locations,
1661                                                                 int64_t spill_offset) {
1662   SaveRestoreLiveRegistersHelperNeonImpl</* is_save= */ true>(codegen_, locations, spill_offset);
1663 }
1664 
RestoreLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1665 void InstructionCodeGeneratorARM64Neon::RestoreLiveRegistersHelper(LocationSummary* locations,
1666                                                                    int64_t spill_offset) {
1667   SaveRestoreLiveRegistersHelperNeonImpl</* is_save= */ false>(codegen_, locations, spill_offset);
1668 }
1669 
1670 #undef __
1671 
1672 }  // namespace arm64
1673 }  // namespace art
1674