1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "mirror/array-inl.h"
21 #include "mirror/string.h"
22 
23 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
24 
25 namespace art {
26 namespace arm64 {
27 
28 using helpers::ARM64EncodableConstantOrRegister;
29 using helpers::Arm64CanEncodeConstantAsImmediate;
30 using helpers::DRegisterFrom;
31 using helpers::HeapOperand;
32 using helpers::InputRegisterAt;
33 using helpers::Int64FromLocation;
34 using helpers::OutputRegister;
35 using helpers::VRegisterFrom;
36 using helpers::WRegisterFrom;
37 using helpers::XRegisterFrom;
38 
39 #define __ GetVIXLAssembler()->
40 
41 // Build-time switch for Armv8.4-a dot product instructions.
42 // TODO: Enable dot product when there is a device to test it on.
43 static constexpr bool kArm64EmitDotProdInstructions = false;
44 
45 // Returns whether dot product instructions should be emitted.
ShouldEmitDotProductInstructions(const CodeGeneratorARM64 * codegen_)46 static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
47   return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd();
48 }
49 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)50 void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
51   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
52   HInstruction* input = instruction->InputAt(0);
53   switch (instruction->GetPackedType()) {
54     case DataType::Type::kBool:
55     case DataType::Type::kUint8:
56     case DataType::Type::kInt8:
57     case DataType::Type::kUint16:
58     case DataType::Type::kInt16:
59     case DataType::Type::kInt32:
60     case DataType::Type::kInt64:
61       locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
62       locations->SetOut(Location::RequiresFpuRegister());
63       break;
64     case DataType::Type::kFloat32:
65     case DataType::Type::kFloat64:
66       if (input->IsConstant() &&
67           Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
68         locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
69         locations->SetOut(Location::RequiresFpuRegister());
70       } else {
71         locations->SetInAt(0, Location::RequiresFpuRegister());
72         locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
73       }
74       break;
75     default:
76       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
77       UNREACHABLE();
78   }
79 }
80 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)81 void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
82   LocationSummary* locations = instruction->GetLocations();
83   Location src_loc = locations->InAt(0);
84   VRegister dst = VRegisterFrom(locations->Out());
85   switch (instruction->GetPackedType()) {
86     case DataType::Type::kBool:
87     case DataType::Type::kUint8:
88     case DataType::Type::kInt8:
89       DCHECK_EQ(16u, instruction->GetVectorLength());
90       if (src_loc.IsConstant()) {
91         __ Movi(dst.V16B(), Int64FromLocation(src_loc));
92       } else {
93         __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
94       }
95       break;
96     case DataType::Type::kUint16:
97     case DataType::Type::kInt16:
98       DCHECK_EQ(8u, instruction->GetVectorLength());
99       if (src_loc.IsConstant()) {
100         __ Movi(dst.V8H(), Int64FromLocation(src_loc));
101       } else {
102         __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
103       }
104       break;
105     case DataType::Type::kInt32:
106       DCHECK_EQ(4u, instruction->GetVectorLength());
107       if (src_loc.IsConstant()) {
108         __ Movi(dst.V4S(), Int64FromLocation(src_loc));
109       } else {
110         __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
111       }
112       break;
113     case DataType::Type::kInt64:
114       DCHECK_EQ(2u, instruction->GetVectorLength());
115       if (src_loc.IsConstant()) {
116         __ Movi(dst.V2D(), Int64FromLocation(src_loc));
117       } else {
118         __ Dup(dst.V2D(), XRegisterFrom(src_loc));
119       }
120       break;
121     case DataType::Type::kFloat32:
122       DCHECK_EQ(4u, instruction->GetVectorLength());
123       if (src_loc.IsConstant()) {
124         __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
125       } else {
126         __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
127       }
128       break;
129     case DataType::Type::kFloat64:
130       DCHECK_EQ(2u, instruction->GetVectorLength());
131       if (src_loc.IsConstant()) {
132         __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
133       } else {
134         __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
135       }
136       break;
137     default:
138       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
139       UNREACHABLE();
140   }
141 }
142 
VisitVecExtractScalar(HVecExtractScalar * instruction)143 void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
144   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
145   switch (instruction->GetPackedType()) {
146     case DataType::Type::kBool:
147     case DataType::Type::kUint8:
148     case DataType::Type::kInt8:
149     case DataType::Type::kUint16:
150     case DataType::Type::kInt16:
151     case DataType::Type::kInt32:
152     case DataType::Type::kInt64:
153       locations->SetInAt(0, Location::RequiresFpuRegister());
154       locations->SetOut(Location::RequiresRegister());
155       break;
156     case DataType::Type::kFloat32:
157     case DataType::Type::kFloat64:
158       locations->SetInAt(0, Location::RequiresFpuRegister());
159       locations->SetOut(Location::SameAsFirstInput());
160       break;
161     default:
162       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
163       UNREACHABLE();
164   }
165 }
166 
VisitVecExtractScalar(HVecExtractScalar * instruction)167 void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
168   LocationSummary* locations = instruction->GetLocations();
169   VRegister src = VRegisterFrom(locations->InAt(0));
170   switch (instruction->GetPackedType()) {
171     case DataType::Type::kInt32:
172       DCHECK_EQ(4u, instruction->GetVectorLength());
173       __ Umov(OutputRegister(instruction), src.V4S(), 0);
174       break;
175     case DataType::Type::kInt64:
176       DCHECK_EQ(2u, instruction->GetVectorLength());
177       __ Umov(OutputRegister(instruction), src.V2D(), 0);
178       break;
179     case DataType::Type::kFloat32:
180     case DataType::Type::kFloat64:
181       DCHECK_LE(2u, instruction->GetVectorLength());
182       DCHECK_LE(instruction->GetVectorLength(), 4u);
183       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
184       break;
185     default:
186       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
187       UNREACHABLE();
188   }
189 }
190 
191 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)192 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
193   LocationSummary* locations = new (allocator) LocationSummary(instruction);
194   switch (instruction->GetPackedType()) {
195     case DataType::Type::kBool:
196       locations->SetInAt(0, Location::RequiresFpuRegister());
197       locations->SetOut(Location::RequiresFpuRegister(),
198                         instruction->IsVecNot() ? Location::kOutputOverlap
199                                                 : Location::kNoOutputOverlap);
200       break;
201     case DataType::Type::kUint8:
202     case DataType::Type::kInt8:
203     case DataType::Type::kUint16:
204     case DataType::Type::kInt16:
205     case DataType::Type::kInt32:
206     case DataType::Type::kInt64:
207     case DataType::Type::kFloat32:
208     case DataType::Type::kFloat64:
209       locations->SetInAt(0, Location::RequiresFpuRegister());
210       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
211       break;
212     default:
213       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
214       UNREACHABLE();
215   }
216 }
217 
VisitVecReduce(HVecReduce * instruction)218 void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
219   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
220 }
221 
VisitVecReduce(HVecReduce * instruction)222 void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
223   LocationSummary* locations = instruction->GetLocations();
224   VRegister src = VRegisterFrom(locations->InAt(0));
225   VRegister dst = DRegisterFrom(locations->Out());
226   switch (instruction->GetPackedType()) {
227     case DataType::Type::kInt32:
228       DCHECK_EQ(4u, instruction->GetVectorLength());
229       switch (instruction->GetReductionKind()) {
230         case HVecReduce::kSum:
231           __ Addv(dst.S(), src.V4S());
232           break;
233         case HVecReduce::kMin:
234           __ Sminv(dst.S(), src.V4S());
235           break;
236         case HVecReduce::kMax:
237           __ Smaxv(dst.S(), src.V4S());
238           break;
239       }
240       break;
241     case DataType::Type::kInt64:
242       DCHECK_EQ(2u, instruction->GetVectorLength());
243       switch (instruction->GetReductionKind()) {
244         case HVecReduce::kSum:
245           __ Addp(dst.D(), src.V2D());
246           break;
247         default:
248           LOG(FATAL) << "Unsupported SIMD min/max";
249           UNREACHABLE();
250       }
251       break;
252     default:
253       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
254       UNREACHABLE();
255   }
256 }
257 
VisitVecCnv(HVecCnv * instruction)258 void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
259   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
260 }
261 
VisitVecCnv(HVecCnv * instruction)262 void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
263   LocationSummary* locations = instruction->GetLocations();
264   VRegister src = VRegisterFrom(locations->InAt(0));
265   VRegister dst = VRegisterFrom(locations->Out());
266   DataType::Type from = instruction->GetInputType();
267   DataType::Type to = instruction->GetResultType();
268   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
269     DCHECK_EQ(4u, instruction->GetVectorLength());
270     __ Scvtf(dst.V4S(), src.V4S());
271   } else {
272     LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
273   }
274 }
275 
VisitVecNeg(HVecNeg * instruction)276 void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
277   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
278 }
279 
VisitVecNeg(HVecNeg * instruction)280 void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
281   LocationSummary* locations = instruction->GetLocations();
282   VRegister src = VRegisterFrom(locations->InAt(0));
283   VRegister dst = VRegisterFrom(locations->Out());
284   switch (instruction->GetPackedType()) {
285     case DataType::Type::kUint8:
286     case DataType::Type::kInt8:
287       DCHECK_EQ(16u, instruction->GetVectorLength());
288       __ Neg(dst.V16B(), src.V16B());
289       break;
290     case DataType::Type::kUint16:
291     case DataType::Type::kInt16:
292       DCHECK_EQ(8u, instruction->GetVectorLength());
293       __ Neg(dst.V8H(), src.V8H());
294       break;
295     case DataType::Type::kInt32:
296       DCHECK_EQ(4u, instruction->GetVectorLength());
297       __ Neg(dst.V4S(), src.V4S());
298       break;
299     case DataType::Type::kInt64:
300       DCHECK_EQ(2u, instruction->GetVectorLength());
301       __ Neg(dst.V2D(), src.V2D());
302       break;
303     case DataType::Type::kFloat32:
304       DCHECK_EQ(4u, instruction->GetVectorLength());
305       __ Fneg(dst.V4S(), src.V4S());
306       break;
307     case DataType::Type::kFloat64:
308       DCHECK_EQ(2u, instruction->GetVectorLength());
309       __ Fneg(dst.V2D(), src.V2D());
310       break;
311     default:
312       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
313       UNREACHABLE();
314   }
315 }
316 
VisitVecAbs(HVecAbs * instruction)317 void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
318   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
319 }
320 
VisitVecAbs(HVecAbs * instruction)321 void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
322   LocationSummary* locations = instruction->GetLocations();
323   VRegister src = VRegisterFrom(locations->InAt(0));
324   VRegister dst = VRegisterFrom(locations->Out());
325   switch (instruction->GetPackedType()) {
326     case DataType::Type::kInt8:
327       DCHECK_EQ(16u, instruction->GetVectorLength());
328       __ Abs(dst.V16B(), src.V16B());
329       break;
330     case DataType::Type::kInt16:
331       DCHECK_EQ(8u, instruction->GetVectorLength());
332       __ Abs(dst.V8H(), src.V8H());
333       break;
334     case DataType::Type::kInt32:
335       DCHECK_EQ(4u, instruction->GetVectorLength());
336       __ Abs(dst.V4S(), src.V4S());
337       break;
338     case DataType::Type::kInt64:
339       DCHECK_EQ(2u, instruction->GetVectorLength());
340       __ Abs(dst.V2D(), src.V2D());
341       break;
342     case DataType::Type::kFloat32:
343       DCHECK_EQ(4u, instruction->GetVectorLength());
344       __ Fabs(dst.V4S(), src.V4S());
345       break;
346     case DataType::Type::kFloat64:
347       DCHECK_EQ(2u, instruction->GetVectorLength());
348       __ Fabs(dst.V2D(), src.V2D());
349       break;
350     default:
351       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
352       UNREACHABLE();
353   }
354 }
355 
VisitVecNot(HVecNot * instruction)356 void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
357   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
358 }
359 
VisitVecNot(HVecNot * instruction)360 void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
361   LocationSummary* locations = instruction->GetLocations();
362   VRegister src = VRegisterFrom(locations->InAt(0));
363   VRegister dst = VRegisterFrom(locations->Out());
364   switch (instruction->GetPackedType()) {
365     case DataType::Type::kBool:  // special case boolean-not
366       DCHECK_EQ(16u, instruction->GetVectorLength());
367       __ Movi(dst.V16B(), 1);
368       __ Eor(dst.V16B(), dst.V16B(), src.V16B());
369       break;
370     case DataType::Type::kUint8:
371     case DataType::Type::kInt8:
372     case DataType::Type::kUint16:
373     case DataType::Type::kInt16:
374     case DataType::Type::kInt32:
375     case DataType::Type::kInt64:
376       __ Not(dst.V16B(), src.V16B());  // lanes do not matter
377       break;
378     default:
379       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
380       UNREACHABLE();
381   }
382 }
383 
384 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)385 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
386   LocationSummary* locations = new (allocator) LocationSummary(instruction);
387   switch (instruction->GetPackedType()) {
388     case DataType::Type::kBool:
389     case DataType::Type::kUint8:
390     case DataType::Type::kInt8:
391     case DataType::Type::kUint16:
392     case DataType::Type::kInt16:
393     case DataType::Type::kInt32:
394     case DataType::Type::kInt64:
395     case DataType::Type::kFloat32:
396     case DataType::Type::kFloat64:
397       locations->SetInAt(0, Location::RequiresFpuRegister());
398       locations->SetInAt(1, Location::RequiresFpuRegister());
399       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
400       break;
401     default:
402       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
403       UNREACHABLE();
404   }
405 }
406 
VisitVecAdd(HVecAdd * instruction)407 void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
408   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
409 }
410 
VisitVecAdd(HVecAdd * instruction)411 void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
412   LocationSummary* locations = instruction->GetLocations();
413   VRegister lhs = VRegisterFrom(locations->InAt(0));
414   VRegister rhs = VRegisterFrom(locations->InAt(1));
415   VRegister dst = VRegisterFrom(locations->Out());
416   switch (instruction->GetPackedType()) {
417     case DataType::Type::kUint8:
418     case DataType::Type::kInt8:
419       DCHECK_EQ(16u, instruction->GetVectorLength());
420       __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
421       break;
422     case DataType::Type::kUint16:
423     case DataType::Type::kInt16:
424       DCHECK_EQ(8u, instruction->GetVectorLength());
425       __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
426       break;
427     case DataType::Type::kInt32:
428       DCHECK_EQ(4u, instruction->GetVectorLength());
429       __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
430       break;
431     case DataType::Type::kInt64:
432       DCHECK_EQ(2u, instruction->GetVectorLength());
433       __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
434       break;
435     case DataType::Type::kFloat32:
436       DCHECK_EQ(4u, instruction->GetVectorLength());
437       __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
438       break;
439     case DataType::Type::kFloat64:
440       DCHECK_EQ(2u, instruction->GetVectorLength());
441       __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
442       break;
443     default:
444       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
445       UNREACHABLE();
446   }
447 }
448 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)449 void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
450   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
451 }
452 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)453 void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
454   LocationSummary* locations = instruction->GetLocations();
455   VRegister lhs = VRegisterFrom(locations->InAt(0));
456   VRegister rhs = VRegisterFrom(locations->InAt(1));
457   VRegister dst = VRegisterFrom(locations->Out());
458   switch (instruction->GetPackedType()) {
459     case DataType::Type::kUint8:
460       DCHECK_EQ(16u, instruction->GetVectorLength());
461       __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
462       break;
463     case DataType::Type::kInt8:
464       DCHECK_EQ(16u, instruction->GetVectorLength());
465       __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
466       break;
467     case DataType::Type::kUint16:
468       DCHECK_EQ(8u, instruction->GetVectorLength());
469       __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
470       break;
471     case DataType::Type::kInt16:
472       DCHECK_EQ(8u, instruction->GetVectorLength());
473       __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
474       break;
475     default:
476       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
477       UNREACHABLE();
478   }
479 }
480 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)481 void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
482   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
483 }
484 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)485 void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
486   LocationSummary* locations = instruction->GetLocations();
487   VRegister lhs = VRegisterFrom(locations->InAt(0));
488   VRegister rhs = VRegisterFrom(locations->InAt(1));
489   VRegister dst = VRegisterFrom(locations->Out());
490   switch (instruction->GetPackedType()) {
491     case DataType::Type::kUint8:
492       DCHECK_EQ(16u, instruction->GetVectorLength());
493       instruction->IsRounded()
494           ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
495           : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
496       break;
497     case DataType::Type::kInt8:
498       DCHECK_EQ(16u, instruction->GetVectorLength());
499       instruction->IsRounded()
500           ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
501           : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
502       break;
503     case DataType::Type::kUint16:
504       DCHECK_EQ(8u, instruction->GetVectorLength());
505       instruction->IsRounded()
506           ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
507           : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
508       break;
509     case DataType::Type::kInt16:
510       DCHECK_EQ(8u, instruction->GetVectorLength());
511       instruction->IsRounded()
512           ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
513           : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
514       break;
515     default:
516       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
517       UNREACHABLE();
518   }
519 }
520 
VisitVecSub(HVecSub * instruction)521 void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
522   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
523 }
524 
VisitVecSub(HVecSub * instruction)525 void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
526   LocationSummary* locations = instruction->GetLocations();
527   VRegister lhs = VRegisterFrom(locations->InAt(0));
528   VRegister rhs = VRegisterFrom(locations->InAt(1));
529   VRegister dst = VRegisterFrom(locations->Out());
530   switch (instruction->GetPackedType()) {
531     case DataType::Type::kUint8:
532     case DataType::Type::kInt8:
533       DCHECK_EQ(16u, instruction->GetVectorLength());
534       __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
535       break;
536     case DataType::Type::kUint16:
537     case DataType::Type::kInt16:
538       DCHECK_EQ(8u, instruction->GetVectorLength());
539       __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
540       break;
541     case DataType::Type::kInt32:
542       DCHECK_EQ(4u, instruction->GetVectorLength());
543       __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
544       break;
545     case DataType::Type::kInt64:
546       DCHECK_EQ(2u, instruction->GetVectorLength());
547       __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
548       break;
549     case DataType::Type::kFloat32:
550       DCHECK_EQ(4u, instruction->GetVectorLength());
551       __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
552       break;
553     case DataType::Type::kFloat64:
554       DCHECK_EQ(2u, instruction->GetVectorLength());
555       __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
556       break;
557     default:
558       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
559       UNREACHABLE();
560   }
561 }
562 
VisitVecSaturationSub(HVecSaturationSub * instruction)563 void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
564   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
565 }
566 
VisitVecSaturationSub(HVecSaturationSub * instruction)567 void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
568   LocationSummary* locations = instruction->GetLocations();
569   VRegister lhs = VRegisterFrom(locations->InAt(0));
570   VRegister rhs = VRegisterFrom(locations->InAt(1));
571   VRegister dst = VRegisterFrom(locations->Out());
572   switch (instruction->GetPackedType()) {
573     case DataType::Type::kUint8:
574       DCHECK_EQ(16u, instruction->GetVectorLength());
575       __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
576       break;
577     case DataType::Type::kInt8:
578       DCHECK_EQ(16u, instruction->GetVectorLength());
579       __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
580       break;
581     case DataType::Type::kUint16:
582       DCHECK_EQ(8u, instruction->GetVectorLength());
583       __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
584       break;
585     case DataType::Type::kInt16:
586       DCHECK_EQ(8u, instruction->GetVectorLength());
587       __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
588       break;
589     default:
590       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
591       UNREACHABLE();
592   }
593 }
594 
VisitVecMul(HVecMul * instruction)595 void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
596   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
597 }
598 
VisitVecMul(HVecMul * instruction)599 void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
600   LocationSummary* locations = instruction->GetLocations();
601   VRegister lhs = VRegisterFrom(locations->InAt(0));
602   VRegister rhs = VRegisterFrom(locations->InAt(1));
603   VRegister dst = VRegisterFrom(locations->Out());
604   switch (instruction->GetPackedType()) {
605     case DataType::Type::kUint8:
606     case DataType::Type::kInt8:
607       DCHECK_EQ(16u, instruction->GetVectorLength());
608       __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
609       break;
610     case DataType::Type::kUint16:
611     case DataType::Type::kInt16:
612       DCHECK_EQ(8u, instruction->GetVectorLength());
613       __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
614       break;
615     case DataType::Type::kInt32:
616       DCHECK_EQ(4u, instruction->GetVectorLength());
617       __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
618       break;
619     case DataType::Type::kFloat32:
620       DCHECK_EQ(4u, instruction->GetVectorLength());
621       __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
622       break;
623     case DataType::Type::kFloat64:
624       DCHECK_EQ(2u, instruction->GetVectorLength());
625       __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
626       break;
627     default:
628       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
629       UNREACHABLE();
630   }
631 }
632 
VisitVecDiv(HVecDiv * instruction)633 void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
634   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
635 }
636 
VisitVecDiv(HVecDiv * instruction)637 void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
638   LocationSummary* locations = instruction->GetLocations();
639   VRegister lhs = VRegisterFrom(locations->InAt(0));
640   VRegister rhs = VRegisterFrom(locations->InAt(1));
641   VRegister dst = VRegisterFrom(locations->Out());
642   switch (instruction->GetPackedType()) {
643     case DataType::Type::kFloat32:
644       DCHECK_EQ(4u, instruction->GetVectorLength());
645       __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
646       break;
647     case DataType::Type::kFloat64:
648       DCHECK_EQ(2u, instruction->GetVectorLength());
649       __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
650       break;
651     default:
652       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
653       UNREACHABLE();
654   }
655 }
656 
VisitVecMin(HVecMin * instruction)657 void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
658   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
659 }
660 
VisitVecMin(HVecMin * instruction)661 void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
662   LocationSummary* locations = instruction->GetLocations();
663   VRegister lhs = VRegisterFrom(locations->InAt(0));
664   VRegister rhs = VRegisterFrom(locations->InAt(1));
665   VRegister dst = VRegisterFrom(locations->Out());
666   switch (instruction->GetPackedType()) {
667     case DataType::Type::kUint8:
668       DCHECK_EQ(16u, instruction->GetVectorLength());
669       __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
670       break;
671     case DataType::Type::kInt8:
672       DCHECK_EQ(16u, instruction->GetVectorLength());
673       __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
674       break;
675     case DataType::Type::kUint16:
676       DCHECK_EQ(8u, instruction->GetVectorLength());
677       __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
678       break;
679     case DataType::Type::kInt16:
680       DCHECK_EQ(8u, instruction->GetVectorLength());
681       __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
682       break;
683     case DataType::Type::kUint32:
684       DCHECK_EQ(4u, instruction->GetVectorLength());
685       __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
686       break;
687     case DataType::Type::kInt32:
688       DCHECK_EQ(4u, instruction->GetVectorLength());
689       __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
690       break;
691     case DataType::Type::kFloat32:
692       DCHECK_EQ(4u, instruction->GetVectorLength());
693       __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
694       break;
695     case DataType::Type::kFloat64:
696       DCHECK_EQ(2u, instruction->GetVectorLength());
697       __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
698       break;
699     default:
700       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
701       UNREACHABLE();
702   }
703 }
704 
VisitVecMax(HVecMax * instruction)705 void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
706   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
707 }
708 
VisitVecMax(HVecMax * instruction)709 void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
710   LocationSummary* locations = instruction->GetLocations();
711   VRegister lhs = VRegisterFrom(locations->InAt(0));
712   VRegister rhs = VRegisterFrom(locations->InAt(1));
713   VRegister dst = VRegisterFrom(locations->Out());
714   switch (instruction->GetPackedType()) {
715     case DataType::Type::kUint8:
716       DCHECK_EQ(16u, instruction->GetVectorLength());
717       __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
718       break;
719     case DataType::Type::kInt8:
720       DCHECK_EQ(16u, instruction->GetVectorLength());
721       __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
722       break;
723     case DataType::Type::kUint16:
724       DCHECK_EQ(8u, instruction->GetVectorLength());
725       __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
726       break;
727     case DataType::Type::kInt16:
728       DCHECK_EQ(8u, instruction->GetVectorLength());
729       __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
730       break;
731     case DataType::Type::kUint32:
732       DCHECK_EQ(4u, instruction->GetVectorLength());
733       __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
734       break;
735     case DataType::Type::kInt32:
736       DCHECK_EQ(4u, instruction->GetVectorLength());
737       __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
738       break;
739     case DataType::Type::kFloat32:
740       DCHECK_EQ(4u, instruction->GetVectorLength());
741       __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
742       break;
743     case DataType::Type::kFloat64:
744       DCHECK_EQ(2u, instruction->GetVectorLength());
745       __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
746       break;
747     default:
748       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
749       UNREACHABLE();
750   }
751 }
752 
VisitVecAnd(HVecAnd * instruction)753 void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
754   // TODO: Allow constants supported by BIC (vector, immediate).
755   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
756 }
757 
VisitVecAnd(HVecAnd * instruction)758 void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
759   LocationSummary* locations = instruction->GetLocations();
760   VRegister lhs = VRegisterFrom(locations->InAt(0));
761   VRegister rhs = VRegisterFrom(locations->InAt(1));
762   VRegister dst = VRegisterFrom(locations->Out());
763   switch (instruction->GetPackedType()) {
764     case DataType::Type::kBool:
765     case DataType::Type::kUint8:
766     case DataType::Type::kInt8:
767     case DataType::Type::kUint16:
768     case DataType::Type::kInt16:
769     case DataType::Type::kInt32:
770     case DataType::Type::kInt64:
771     case DataType::Type::kFloat32:
772     case DataType::Type::kFloat64:
773       __ And(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
774       break;
775     default:
776       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
777       UNREACHABLE();
778   }
779 }
780 
VisitVecAndNot(HVecAndNot * instruction)781 void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) {
782   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
783 }
784 
VisitVecAndNot(HVecAndNot * instruction)785 void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
786   // TODO: Use BIC (vector, register).
787   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
788 }
789 
VisitVecOr(HVecOr * instruction)790 void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
791   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
792 }
793 
VisitVecOr(HVecOr * instruction)794 void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
795   LocationSummary* locations = instruction->GetLocations();
796   VRegister lhs = VRegisterFrom(locations->InAt(0));
797   VRegister rhs = VRegisterFrom(locations->InAt(1));
798   VRegister dst = VRegisterFrom(locations->Out());
799   switch (instruction->GetPackedType()) {
800     case DataType::Type::kBool:
801     case DataType::Type::kUint8:
802     case DataType::Type::kInt8:
803     case DataType::Type::kUint16:
804     case DataType::Type::kInt16:
805     case DataType::Type::kInt32:
806     case DataType::Type::kInt64:
807     case DataType::Type::kFloat32:
808     case DataType::Type::kFloat64:
809       __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
810       break;
811     default:
812       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
813       UNREACHABLE();
814   }
815 }
816 
VisitVecXor(HVecXor * instruction)817 void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
818   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
819 }
820 
VisitVecXor(HVecXor * instruction)821 void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
822   LocationSummary* locations = instruction->GetLocations();
823   VRegister lhs = VRegisterFrom(locations->InAt(0));
824   VRegister rhs = VRegisterFrom(locations->InAt(1));
825   VRegister dst = VRegisterFrom(locations->Out());
826   switch (instruction->GetPackedType()) {
827     case DataType::Type::kBool:
828     case DataType::Type::kUint8:
829     case DataType::Type::kInt8:
830     case DataType::Type::kUint16:
831     case DataType::Type::kInt16:
832     case DataType::Type::kInt32:
833     case DataType::Type::kInt64:
834     case DataType::Type::kFloat32:
835     case DataType::Type::kFloat64:
836       __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
837       break;
838     default:
839       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
840       UNREACHABLE();
841   }
842 }
843 
844 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)845 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
846   LocationSummary* locations = new (allocator) LocationSummary(instruction);
847   switch (instruction->GetPackedType()) {
848     case DataType::Type::kUint8:
849     case DataType::Type::kInt8:
850     case DataType::Type::kUint16:
851     case DataType::Type::kInt16:
852     case DataType::Type::kInt32:
853     case DataType::Type::kInt64:
854       locations->SetInAt(0, Location::RequiresFpuRegister());
855       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
856       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
857       break;
858     default:
859       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
860       UNREACHABLE();
861   }
862 }
863 
VisitVecShl(HVecShl * instruction)864 void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
865   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
866 }
867 
VisitVecShl(HVecShl * instruction)868 void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
869   LocationSummary* locations = instruction->GetLocations();
870   VRegister lhs = VRegisterFrom(locations->InAt(0));
871   VRegister dst = VRegisterFrom(locations->Out());
872   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
873   switch (instruction->GetPackedType()) {
874     case DataType::Type::kUint8:
875     case DataType::Type::kInt8:
876       DCHECK_EQ(16u, instruction->GetVectorLength());
877       __ Shl(dst.V16B(), lhs.V16B(), value);
878       break;
879     case DataType::Type::kUint16:
880     case DataType::Type::kInt16:
881       DCHECK_EQ(8u, instruction->GetVectorLength());
882       __ Shl(dst.V8H(), lhs.V8H(), value);
883       break;
884     case DataType::Type::kInt32:
885       DCHECK_EQ(4u, instruction->GetVectorLength());
886       __ Shl(dst.V4S(), lhs.V4S(), value);
887       break;
888     case DataType::Type::kInt64:
889       DCHECK_EQ(2u, instruction->GetVectorLength());
890       __ Shl(dst.V2D(), lhs.V2D(), value);
891       break;
892     default:
893       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
894       UNREACHABLE();
895   }
896 }
897 
VisitVecShr(HVecShr * instruction)898 void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
899   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
900 }
901 
VisitVecShr(HVecShr * instruction)902 void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
903   LocationSummary* locations = instruction->GetLocations();
904   VRegister lhs = VRegisterFrom(locations->InAt(0));
905   VRegister dst = VRegisterFrom(locations->Out());
906   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
907   switch (instruction->GetPackedType()) {
908     case DataType::Type::kUint8:
909     case DataType::Type::kInt8:
910       DCHECK_EQ(16u, instruction->GetVectorLength());
911       __ Sshr(dst.V16B(), lhs.V16B(), value);
912       break;
913     case DataType::Type::kUint16:
914     case DataType::Type::kInt16:
915       DCHECK_EQ(8u, instruction->GetVectorLength());
916       __ Sshr(dst.V8H(), lhs.V8H(), value);
917       break;
918     case DataType::Type::kInt32:
919       DCHECK_EQ(4u, instruction->GetVectorLength());
920       __ Sshr(dst.V4S(), lhs.V4S(), value);
921       break;
922     case DataType::Type::kInt64:
923       DCHECK_EQ(2u, instruction->GetVectorLength());
924       __ Sshr(dst.V2D(), lhs.V2D(), value);
925       break;
926     default:
927       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
928       UNREACHABLE();
929   }
930 }
931 
VisitVecUShr(HVecUShr * instruction)932 void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
933   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
934 }
935 
VisitVecUShr(HVecUShr * instruction)936 void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
937   LocationSummary* locations = instruction->GetLocations();
938   VRegister lhs = VRegisterFrom(locations->InAt(0));
939   VRegister dst = VRegisterFrom(locations->Out());
940   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
941   switch (instruction->GetPackedType()) {
942     case DataType::Type::kUint8:
943     case DataType::Type::kInt8:
944       DCHECK_EQ(16u, instruction->GetVectorLength());
945       __ Ushr(dst.V16B(), lhs.V16B(), value);
946       break;
947     case DataType::Type::kUint16:
948     case DataType::Type::kInt16:
949       DCHECK_EQ(8u, instruction->GetVectorLength());
950       __ Ushr(dst.V8H(), lhs.V8H(), value);
951       break;
952     case DataType::Type::kInt32:
953       DCHECK_EQ(4u, instruction->GetVectorLength());
954       __ Ushr(dst.V4S(), lhs.V4S(), value);
955       break;
956     case DataType::Type::kInt64:
957       DCHECK_EQ(2u, instruction->GetVectorLength());
958       __ Ushr(dst.V2D(), lhs.V2D(), value);
959       break;
960     default:
961       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
962       UNREACHABLE();
963   }
964 }
965 
VisitVecSetScalars(HVecSetScalars * instruction)966 void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
967   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
968 
969   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
970 
971   HInstruction* input = instruction->InputAt(0);
972   bool is_zero = IsZeroBitPattern(input);
973 
974   switch (instruction->GetPackedType()) {
975     case DataType::Type::kBool:
976     case DataType::Type::kUint8:
977     case DataType::Type::kInt8:
978     case DataType::Type::kUint16:
979     case DataType::Type::kInt16:
980     case DataType::Type::kInt32:
981     case DataType::Type::kInt64:
982       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
983                                     : Location::RequiresRegister());
984       locations->SetOut(Location::RequiresFpuRegister());
985       break;
986     case DataType::Type::kFloat32:
987     case DataType::Type::kFloat64:
988       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
989                                     : Location::RequiresFpuRegister());
990       locations->SetOut(Location::RequiresFpuRegister());
991       break;
992     default:
993       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
994       UNREACHABLE();
995   }
996 }
997 
VisitVecSetScalars(HVecSetScalars * instruction)998 void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
999   LocationSummary* locations = instruction->GetLocations();
1000   VRegister dst = VRegisterFrom(locations->Out());
1001 
1002   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
1003 
1004   // Zero out all other elements first.
1005   __ Movi(dst.V16B(), 0);
1006 
1007   // Shorthand for any type of zero.
1008   if (IsZeroBitPattern(instruction->InputAt(0))) {
1009     return;
1010   }
1011 
1012   // Set required elements.
1013   switch (instruction->GetPackedType()) {
1014     case DataType::Type::kBool:
1015     case DataType::Type::kUint8:
1016     case DataType::Type::kInt8:
1017       DCHECK_EQ(16u, instruction->GetVectorLength());
1018       __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
1019       break;
1020     case DataType::Type::kUint16:
1021     case DataType::Type::kInt16:
1022       DCHECK_EQ(8u, instruction->GetVectorLength());
1023       __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
1024       break;
1025     case DataType::Type::kInt32:
1026       DCHECK_EQ(4u, instruction->GetVectorLength());
1027       __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
1028       break;
1029     case DataType::Type::kInt64:
1030       DCHECK_EQ(2u, instruction->GetVectorLength());
1031       __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
1032       break;
1033     default:
1034       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1035       UNREACHABLE();
1036   }
1037 }
1038 
1039 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1040 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1041   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1042   switch (instruction->GetPackedType()) {
1043     case DataType::Type::kUint8:
1044     case DataType::Type::kInt8:
1045     case DataType::Type::kUint16:
1046     case DataType::Type::kInt16:
1047     case DataType::Type::kInt32:
1048     case DataType::Type::kInt64:
1049       locations->SetInAt(0, Location::RequiresFpuRegister());
1050       locations->SetInAt(1, Location::RequiresFpuRegister());
1051       locations->SetInAt(2, Location::RequiresFpuRegister());
1052       locations->SetOut(Location::SameAsFirstInput());
1053       break;
1054     default:
1055       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1056       UNREACHABLE();
1057   }
1058 }
1059 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1060 void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1061   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1062 }
1063 
1064 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
1065 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
1066 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1067 void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1068   LocationSummary* locations = instruction->GetLocations();
1069   VRegister acc = VRegisterFrom(locations->InAt(0));
1070   VRegister left = VRegisterFrom(locations->InAt(1));
1071   VRegister right = VRegisterFrom(locations->InAt(2));
1072 
1073   DCHECK(locations->InAt(0).Equals(locations->Out()));
1074 
1075   switch (instruction->GetPackedType()) {
1076     case DataType::Type::kUint8:
1077     case DataType::Type::kInt8:
1078       DCHECK_EQ(16u, instruction->GetVectorLength());
1079       if (instruction->GetOpKind() == HInstruction::kAdd) {
1080         __ Mla(acc.V16B(), left.V16B(), right.V16B());
1081       } else {
1082         __ Mls(acc.V16B(), left.V16B(), right.V16B());
1083       }
1084       break;
1085     case DataType::Type::kUint16:
1086     case DataType::Type::kInt16:
1087       DCHECK_EQ(8u, instruction->GetVectorLength());
1088       if (instruction->GetOpKind() == HInstruction::kAdd) {
1089         __ Mla(acc.V8H(), left.V8H(), right.V8H());
1090       } else {
1091         __ Mls(acc.V8H(), left.V8H(), right.V8H());
1092       }
1093       break;
1094     case DataType::Type::kInt32:
1095       DCHECK_EQ(4u, instruction->GetVectorLength());
1096       if (instruction->GetOpKind() == HInstruction::kAdd) {
1097         __ Mla(acc.V4S(), left.V4S(), right.V4S());
1098       } else {
1099         __ Mls(acc.V4S(), left.V4S(), right.V4S());
1100       }
1101       break;
1102     default:
1103       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1104       UNREACHABLE();
1105   }
1106 }
1107 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1108 void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1109   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1110   // Some conversions require temporary registers.
1111   LocationSummary* locations = instruction->GetLocations();
1112   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1113   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1114   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1115             HVecOperation::ToSignedType(b->GetPackedType()));
1116   switch (a->GetPackedType()) {
1117     case DataType::Type::kUint8:
1118     case DataType::Type::kInt8:
1119       switch (instruction->GetPackedType()) {
1120         case DataType::Type::kInt64:
1121           locations->AddTemp(Location::RequiresFpuRegister());
1122           locations->AddTemp(Location::RequiresFpuRegister());
1123           FALLTHROUGH_INTENDED;
1124         case DataType::Type::kInt32:
1125           locations->AddTemp(Location::RequiresFpuRegister());
1126           locations->AddTemp(Location::RequiresFpuRegister());
1127           break;
1128         default:
1129           break;
1130       }
1131       break;
1132     case DataType::Type::kUint16:
1133     case DataType::Type::kInt16:
1134       if (instruction->GetPackedType() == DataType::Type::kInt64) {
1135         locations->AddTemp(Location::RequiresFpuRegister());
1136         locations->AddTemp(Location::RequiresFpuRegister());
1137       }
1138       break;
1139     case DataType::Type::kInt32:
1140     case DataType::Type::kInt64:
1141       if (instruction->GetPackedType() == a->GetPackedType()) {
1142         locations->AddTemp(Location::RequiresFpuRegister());
1143       }
1144       break;
1145     default:
1146       break;
1147   }
1148 }
1149 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1150 void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1151   LocationSummary* locations = instruction->GetLocations();
1152   VRegister acc = VRegisterFrom(locations->InAt(0));
1153   VRegister left = VRegisterFrom(locations->InAt(1));
1154   VRegister right = VRegisterFrom(locations->InAt(2));
1155 
1156   DCHECK(locations->InAt(0).Equals(locations->Out()));
1157 
1158   // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
1159   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1160   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1161   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1162             HVecOperation::ToSignedType(b->GetPackedType()));
1163   switch (a->GetPackedType()) {
1164     case DataType::Type::kUint8:
1165     case DataType::Type::kInt8:
1166       DCHECK_EQ(16u, a->GetVectorLength());
1167       switch (instruction->GetPackedType()) {
1168         case DataType::Type::kInt16:
1169           DCHECK_EQ(8u, instruction->GetVectorLength());
1170           __ Sabal(acc.V8H(), left.V8B(), right.V8B());
1171           __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
1172           break;
1173         case DataType::Type::kInt32: {
1174           DCHECK_EQ(4u, instruction->GetVectorLength());
1175           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1176           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1177           __ Sxtl(tmp1.V8H(), left.V8B());
1178           __ Sxtl(tmp2.V8H(), right.V8B());
1179           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1180           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1181           __ Sxtl2(tmp1.V8H(), left.V16B());
1182           __ Sxtl2(tmp2.V8H(), right.V16B());
1183           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1184           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1185           break;
1186         }
1187         case DataType::Type::kInt64: {
1188           DCHECK_EQ(2u, instruction->GetVectorLength());
1189           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1190           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1191           VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
1192           VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
1193           __ Sxtl(tmp1.V8H(), left.V8B());
1194           __ Sxtl(tmp2.V8H(), right.V8B());
1195           __ Sxtl(tmp3.V4S(), tmp1.V4H());
1196           __ Sxtl(tmp4.V4S(), tmp2.V4H());
1197           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1198           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1199           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1200           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1201           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1202           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1203           __ Sxtl2(tmp1.V8H(), left.V16B());
1204           __ Sxtl2(tmp2.V8H(), right.V16B());
1205           __ Sxtl(tmp3.V4S(), tmp1.V4H());
1206           __ Sxtl(tmp4.V4S(), tmp2.V4H());
1207           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1208           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1209           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1210           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1211           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1212           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1213           break;
1214         }
1215         default:
1216           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1217           UNREACHABLE();
1218       }
1219       break;
1220     case DataType::Type::kUint16:
1221     case DataType::Type::kInt16:
1222       DCHECK_EQ(8u, a->GetVectorLength());
1223       switch (instruction->GetPackedType()) {
1224         case DataType::Type::kInt32:
1225           DCHECK_EQ(4u, instruction->GetVectorLength());
1226           __ Sabal(acc.V4S(), left.V4H(), right.V4H());
1227           __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
1228           break;
1229         case DataType::Type::kInt64: {
1230           DCHECK_EQ(2u, instruction->GetVectorLength());
1231           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1232           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1233           __ Sxtl(tmp1.V4S(), left.V4H());
1234           __ Sxtl(tmp2.V4S(), right.V4H());
1235           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1236           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1237           __ Sxtl2(tmp1.V4S(), left.V8H());
1238           __ Sxtl2(tmp2.V4S(), right.V8H());
1239           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1240           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1241           break;
1242         }
1243         default:
1244           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1245           UNREACHABLE();
1246       }
1247       break;
1248     case DataType::Type::kInt32:
1249       DCHECK_EQ(4u, a->GetVectorLength());
1250       switch (instruction->GetPackedType()) {
1251         case DataType::Type::kInt32: {
1252           DCHECK_EQ(4u, instruction->GetVectorLength());
1253           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1254           __ Sub(tmp.V4S(), left.V4S(), right.V4S());
1255           __ Abs(tmp.V4S(), tmp.V4S());
1256           __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
1257           break;
1258         }
1259         case DataType::Type::kInt64:
1260           DCHECK_EQ(2u, instruction->GetVectorLength());
1261           __ Sabal(acc.V2D(), left.V2S(), right.V2S());
1262           __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
1263           break;
1264         default:
1265           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1266           UNREACHABLE();
1267       }
1268       break;
1269     case DataType::Type::kInt64:
1270       DCHECK_EQ(2u, a->GetVectorLength());
1271       switch (instruction->GetPackedType()) {
1272         case DataType::Type::kInt64: {
1273           DCHECK_EQ(2u, instruction->GetVectorLength());
1274           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1275           __ Sub(tmp.V2D(), left.V2D(), right.V2D());
1276           __ Abs(tmp.V2D(), tmp.V2D());
1277           __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
1278           break;
1279         }
1280         default:
1281           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1282           UNREACHABLE();
1283       }
1284       break;
1285     default:
1286       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1287   }
1288 }
1289 
VisitVecDotProd(HVecDotProd * instruction)1290 void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) {
1291   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1292   DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1293   locations->SetInAt(0, Location::RequiresFpuRegister());
1294   locations->SetInAt(1, Location::RequiresFpuRegister());
1295   locations->SetInAt(2, Location::RequiresFpuRegister());
1296   locations->SetOut(Location::SameAsFirstInput());
1297 
1298   // For Int8 and Uint8 general case we need a temp register.
1299   if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
1300       !ShouldEmitDotProductInstructions(codegen_)) {
1301     locations->AddTemp(Location::RequiresFpuRegister());
1302   }
1303 }
1304 
VisitVecDotProd(HVecDotProd * instruction)1305 void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) {
1306   LocationSummary* locations = instruction->GetLocations();
1307   DCHECK(locations->InAt(0).Equals(locations->Out()));
1308   VRegister acc = VRegisterFrom(locations->InAt(0));
1309   VRegister left = VRegisterFrom(locations->InAt(1));
1310   VRegister right = VRegisterFrom(locations->InAt(2));
1311   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1312   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1313   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1314             HVecOperation::ToSignedType(b->GetPackedType()));
1315   DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1316   DCHECK_EQ(4u, instruction->GetVectorLength());
1317 
1318   size_t inputs_data_size = DataType::Size(a->GetPackedType());
1319   switch (inputs_data_size) {
1320     case 1u: {
1321       DCHECK_EQ(16u, a->GetVectorLength());
1322       if (instruction->IsZeroExtending()) {
1323         if (ShouldEmitDotProductInstructions(codegen_)) {
1324           __ Udot(acc.V4S(), left.V16B(), right.V16B());
1325         } else {
1326           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1327           __ Umull(tmp.V8H(), left.V8B(), right.V8B());
1328           __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1329           __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1330 
1331           __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
1332           __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1333           __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1334         }
1335       } else {
1336         if (ShouldEmitDotProductInstructions(codegen_)) {
1337           __ Sdot(acc.V4S(), left.V16B(), right.V16B());
1338         } else {
1339           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1340           __ Smull(tmp.V8H(), left.V8B(), right.V8B());
1341           __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1342           __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1343 
1344           __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
1345           __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1346           __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1347         }
1348       }
1349       break;
1350     }
1351     case 2u:
1352       DCHECK_EQ(8u, a->GetVectorLength());
1353       if (instruction->IsZeroExtending()) {
1354         __ Umlal(acc.V4S(), left.V4H(), right.V4H());
1355         __ Umlal2(acc.V4S(), left.V8H(), right.V8H());
1356       } else {
1357         __ Smlal(acc.V4S(), left.V4H(), right.V4H());
1358         __ Smlal2(acc.V4S(), left.V8H(), right.V8H());
1359       }
1360       break;
1361     default:
1362       LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1363   }
1364 }
1365 
1366 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1367 static void CreateVecMemLocations(ArenaAllocator* allocator,
1368                                   HVecMemoryOperation* instruction,
1369                                   bool is_load) {
1370   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1371   switch (instruction->GetPackedType()) {
1372     case DataType::Type::kBool:
1373     case DataType::Type::kUint8:
1374     case DataType::Type::kInt8:
1375     case DataType::Type::kUint16:
1376     case DataType::Type::kInt16:
1377     case DataType::Type::kInt32:
1378     case DataType::Type::kInt64:
1379     case DataType::Type::kFloat32:
1380     case DataType::Type::kFloat64:
1381       locations->SetInAt(0, Location::RequiresRegister());
1382       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1383       if (is_load) {
1384         locations->SetOut(Location::RequiresFpuRegister());
1385       } else {
1386         locations->SetInAt(2, Location::RequiresFpuRegister());
1387       }
1388       break;
1389     default:
1390       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1391       UNREACHABLE();
1392   }
1393 }
1394 
1395 // Helper to set up locations for vector memory operations. Returns the memory operand and,
1396 // if used, sets the output parameter scratch to a temporary register used in this operand,
1397 // so that the client can release it right after the memory operand use.
VecAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)1398 MemOperand InstructionCodeGeneratorARM64::VecAddress(
1399     HVecMemoryOperation* instruction,
1400     UseScratchRegisterScope* temps_scope,
1401     size_t size,
1402     bool is_string_char_at,
1403     /*out*/ Register* scratch) {
1404   LocationSummary* locations = instruction->GetLocations();
1405   Register base = InputRegisterAt(instruction, 0);
1406 
1407   if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
1408     DCHECK(!is_string_char_at);
1409     return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
1410   }
1411 
1412   Location index = locations->InAt(1);
1413   uint32_t offset = is_string_char_at
1414       ? mirror::String::ValueOffset().Uint32Value()
1415       : mirror::Array::DataOffset(size).Uint32Value();
1416   size_t shift = ComponentSizeShiftWidth(size);
1417 
1418   // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
1419   DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
1420 
1421   if (index.IsConstant()) {
1422     offset += Int64FromLocation(index) << shift;
1423     return HeapOperand(base, offset);
1424   } else {
1425     *scratch = temps_scope->AcquireSameSizeAs(base);
1426     __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
1427     return HeapOperand(*scratch, offset);
1428   }
1429 }
1430 
VisitVecLoad(HVecLoad * instruction)1431 void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
1432   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1433 }
1434 
VisitVecLoad(HVecLoad * instruction)1435 void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
1436   LocationSummary* locations = instruction->GetLocations();
1437   size_t size = DataType::Size(instruction->GetPackedType());
1438   VRegister reg = VRegisterFrom(locations->Out());
1439   UseScratchRegisterScope temps(GetVIXLAssembler());
1440   Register scratch;
1441 
1442   switch (instruction->GetPackedType()) {
1443     case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1444     case DataType::Type::kUint16:
1445       DCHECK_EQ(8u, instruction->GetVectorLength());
1446       // Special handling of compressed/uncompressed string load.
1447       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1448         vixl::aarch64::Label uncompressed_load, done;
1449         // Test compression bit.
1450         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1451                       "Expecting 0=compressed, 1=uncompressed");
1452         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1453         Register length = temps.AcquireW();
1454         __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
1455         __ Tbnz(length.W(), 0, &uncompressed_load);
1456         temps.Release(length);  // no longer needed
1457         // Zero extend 8 compressed bytes into 8 chars.
1458         __ Ldr(DRegisterFrom(locations->Out()).V8B(),
1459                VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
1460         __ Uxtl(reg.V8H(), reg.V8B());
1461         __ B(&done);
1462         if (scratch.IsValid()) {
1463           temps.Release(scratch);  // if used, no longer needed
1464         }
1465         // Load 8 direct uncompressed chars.
1466         __ Bind(&uncompressed_load);
1467         __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
1468         __ Bind(&done);
1469         return;
1470       }
1471       FALLTHROUGH_INTENDED;
1472     case DataType::Type::kBool:
1473     case DataType::Type::kUint8:
1474     case DataType::Type::kInt8:
1475     case DataType::Type::kInt32:
1476     case DataType::Type::kFloat32:
1477     case DataType::Type::kInt64:
1478     case DataType::Type::kFloat64:
1479       DCHECK_LE(2u, instruction->GetVectorLength());
1480       DCHECK_LE(instruction->GetVectorLength(), 16u);
1481       __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
1482       break;
1483     default:
1484       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1485       UNREACHABLE();
1486   }
1487 }
1488 
VisitVecStore(HVecStore * instruction)1489 void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
1490   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1491 }
1492 
VisitVecStore(HVecStore * instruction)1493 void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
1494   LocationSummary* locations = instruction->GetLocations();
1495   size_t size = DataType::Size(instruction->GetPackedType());
1496   VRegister reg = VRegisterFrom(locations->InAt(2));
1497   UseScratchRegisterScope temps(GetVIXLAssembler());
1498   Register scratch;
1499 
1500   switch (instruction->GetPackedType()) {
1501     case DataType::Type::kBool:
1502     case DataType::Type::kUint8:
1503     case DataType::Type::kInt8:
1504     case DataType::Type::kUint16:
1505     case DataType::Type::kInt16:
1506     case DataType::Type::kInt32:
1507     case DataType::Type::kFloat32:
1508     case DataType::Type::kInt64:
1509     case DataType::Type::kFloat64:
1510       DCHECK_LE(2u, instruction->GetVectorLength());
1511       DCHECK_LE(instruction->GetVectorLength(), 16u);
1512       __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1513       break;
1514     default:
1515       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1516       UNREACHABLE();
1517   }
1518 }
1519 
1520 #undef __
1521 
1522 }  // namespace arm64
1523 }  // namespace art
1524