1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm64.h"
18
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "base/bit_utils_iterator.h"
21 #include "mirror/array-inl.h"
22 #include "mirror/string.h"
23
24 using namespace vixl::aarch64; // NOLINT(build/namespaces)
25
26 namespace art HIDDEN {
27 namespace arm64 {
28
29 using helpers::DRegisterFrom;
30 using helpers::HeapOperand;
31 using helpers::InputRegisterAt;
32 using helpers::Int64FromLocation;
33 using helpers::LocationFrom;
34 using helpers::OutputRegister;
35 using helpers::QRegisterFrom;
36 using helpers::StackOperandFrom;
37 using helpers::VRegisterFrom;
38 using helpers::XRegisterFrom;
39
40 #define __ GetVIXLAssembler()->
41
42 // Returns whether the value of the constant can be directly encoded into the instruction as
43 // immediate.
NEONCanEncodeConstantAsImmediate(HConstant * constant,HInstruction * instr)44 inline bool NEONCanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
45 // TODO: Improve this when IsSIMDConstantEncodable method is implemented in VIXL.
46 if (instr->IsVecReplicateScalar()) {
47 if (constant->IsLongConstant()) {
48 return false;
49 } else if (constant->IsFloatConstant()) {
50 return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
51 } else if (constant->IsDoubleConstant()) {
52 return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
53 }
54 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
55 return IsUint<8>(value);
56 }
57 return false;
58 }
59
60 // Returns
61 // - constant location - if 'constant' is an actual constant and its value can be
62 // encoded into the instruction.
63 // - register location otherwise.
NEONEncodableConstantOrRegister(HInstruction * constant,HInstruction * instr)64 inline Location NEONEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
65 if (constant->IsConstant() && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
66 return Location::ConstantLocation(constant);
67 }
68
69 return Location::RequiresRegister();
70 }
71
72 // Returns whether dot product instructions should be emitted.
ShouldEmitDotProductInstructions(const CodeGeneratorARM64 * codegen_)73 static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
74 return codegen_->GetInstructionSetFeatures().HasDotProd();
75 }
76
VisitVecReplicateScalar(HVecReplicateScalar * instruction)77 void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
78 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
79 HInstruction* input = instruction->InputAt(0);
80 switch (instruction->GetPackedType()) {
81 case DataType::Type::kBool:
82 case DataType::Type::kUint8:
83 case DataType::Type::kInt8:
84 case DataType::Type::kUint16:
85 case DataType::Type::kInt16:
86 case DataType::Type::kInt32:
87 case DataType::Type::kInt64:
88 locations->SetInAt(0, NEONEncodableConstantOrRegister(input, instruction));
89 locations->SetOut(Location::RequiresFpuRegister());
90 break;
91 case DataType::Type::kFloat32:
92 case DataType::Type::kFloat64:
93 if (input->IsConstant() &&
94 NEONCanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
95 locations->SetInAt(0, Location::ConstantLocation(input));
96 locations->SetOut(Location::RequiresFpuRegister());
97 } else {
98 locations->SetInAt(0, Location::RequiresFpuRegister());
99 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
100 }
101 break;
102 default:
103 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
104 UNREACHABLE();
105 }
106 }
107
VisitVecReplicateScalar(HVecReplicateScalar * instruction)108 void InstructionCodeGeneratorARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
109 LocationSummary* locations = instruction->GetLocations();
110 Location src_loc = locations->InAt(0);
111 VRegister dst = VRegisterFrom(locations->Out());
112 switch (instruction->GetPackedType()) {
113 case DataType::Type::kBool:
114 case DataType::Type::kUint8:
115 case DataType::Type::kInt8:
116 DCHECK_EQ(16u, instruction->GetVectorLength());
117 if (src_loc.IsConstant()) {
118 __ Movi(dst.V16B(), Int64FromLocation(src_loc));
119 } else {
120 __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
121 }
122 break;
123 case DataType::Type::kUint16:
124 case DataType::Type::kInt16:
125 DCHECK_EQ(8u, instruction->GetVectorLength());
126 if (src_loc.IsConstant()) {
127 __ Movi(dst.V8H(), Int64FromLocation(src_loc));
128 } else {
129 __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
130 }
131 break;
132 case DataType::Type::kInt32:
133 DCHECK_EQ(4u, instruction->GetVectorLength());
134 if (src_loc.IsConstant()) {
135 __ Movi(dst.V4S(), Int64FromLocation(src_loc));
136 } else {
137 __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
138 }
139 break;
140 case DataType::Type::kInt64:
141 DCHECK_EQ(2u, instruction->GetVectorLength());
142 if (src_loc.IsConstant()) {
143 __ Movi(dst.V2D(), Int64FromLocation(src_loc));
144 } else {
145 __ Dup(dst.V2D(), XRegisterFrom(src_loc));
146 }
147 break;
148 case DataType::Type::kFloat32:
149 DCHECK_EQ(4u, instruction->GetVectorLength());
150 if (src_loc.IsConstant()) {
151 __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
152 } else {
153 __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
154 }
155 break;
156 case DataType::Type::kFloat64:
157 DCHECK_EQ(2u, instruction->GetVectorLength());
158 if (src_loc.IsConstant()) {
159 __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
160 } else {
161 __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
162 }
163 break;
164 default:
165 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
166 UNREACHABLE();
167 }
168 }
169
VisitVecExtractScalar(HVecExtractScalar * instruction)170 void LocationsBuilderARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
171 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
172 switch (instruction->GetPackedType()) {
173 case DataType::Type::kBool:
174 case DataType::Type::kUint8:
175 case DataType::Type::kInt8:
176 case DataType::Type::kUint16:
177 case DataType::Type::kInt16:
178 case DataType::Type::kInt32:
179 case DataType::Type::kInt64:
180 locations->SetInAt(0, Location::RequiresFpuRegister());
181 locations->SetOut(Location::RequiresRegister());
182 break;
183 case DataType::Type::kFloat32:
184 case DataType::Type::kFloat64:
185 locations->SetInAt(0, Location::RequiresFpuRegister());
186 locations->SetOut(Location::SameAsFirstInput());
187 break;
188 default:
189 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
190 UNREACHABLE();
191 }
192 }
193
VisitVecExtractScalar(HVecExtractScalar * instruction)194 void InstructionCodeGeneratorARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
195 LocationSummary* locations = instruction->GetLocations();
196 VRegister src = VRegisterFrom(locations->InAt(0));
197 switch (instruction->GetPackedType()) {
198 case DataType::Type::kInt32:
199 DCHECK_EQ(4u, instruction->GetVectorLength());
200 __ Umov(OutputRegister(instruction), src.V4S(), 0);
201 break;
202 case DataType::Type::kInt64:
203 DCHECK_EQ(2u, instruction->GetVectorLength());
204 __ Umov(OutputRegister(instruction), src.V2D(), 0);
205 break;
206 case DataType::Type::kFloat32:
207 case DataType::Type::kFloat64:
208 DCHECK_LE(2u, instruction->GetVectorLength());
209 DCHECK_LE(instruction->GetVectorLength(), 4u);
210 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
211 break;
212 default:
213 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
214 UNREACHABLE();
215 }
216 }
217
218 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)219 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
220 LocationSummary* locations = new (allocator) LocationSummary(instruction);
221 switch (instruction->GetPackedType()) {
222 case DataType::Type::kBool:
223 locations->SetInAt(0, Location::RequiresFpuRegister());
224 locations->SetOut(Location::RequiresFpuRegister(),
225 instruction->IsVecNot() ? Location::kOutputOverlap
226 : Location::kNoOutputOverlap);
227 break;
228 case DataType::Type::kUint8:
229 case DataType::Type::kInt8:
230 case DataType::Type::kUint16:
231 case DataType::Type::kInt16:
232 case DataType::Type::kInt32:
233 case DataType::Type::kInt64:
234 case DataType::Type::kFloat32:
235 case DataType::Type::kFloat64:
236 locations->SetInAt(0, Location::RequiresFpuRegister());
237 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
238 break;
239 default:
240 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
241 UNREACHABLE();
242 }
243 }
244
VisitVecReduce(HVecReduce * instruction)245 void LocationsBuilderARM64Neon::VisitVecReduce(HVecReduce* instruction) {
246 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
247 }
248
VisitVecReduce(HVecReduce * instruction)249 void InstructionCodeGeneratorARM64Neon::VisitVecReduce(HVecReduce* instruction) {
250 LocationSummary* locations = instruction->GetLocations();
251 VRegister src = VRegisterFrom(locations->InAt(0));
252 VRegister dst = DRegisterFrom(locations->Out());
253 switch (instruction->GetPackedType()) {
254 case DataType::Type::kInt32:
255 DCHECK_EQ(4u, instruction->GetVectorLength());
256 switch (instruction->GetReductionKind()) {
257 case HVecReduce::kSum:
258 __ Addv(dst.S(), src.V4S());
259 break;
260 case HVecReduce::kMin:
261 __ Sminv(dst.S(), src.V4S());
262 break;
263 case HVecReduce::kMax:
264 __ Smaxv(dst.S(), src.V4S());
265 break;
266 }
267 break;
268 case DataType::Type::kInt64:
269 DCHECK_EQ(2u, instruction->GetVectorLength());
270 switch (instruction->GetReductionKind()) {
271 case HVecReduce::kSum:
272 __ Addp(dst.D(), src.V2D());
273 break;
274 default:
275 LOG(FATAL) << "Unsupported SIMD min/max";
276 UNREACHABLE();
277 }
278 break;
279 default:
280 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
281 UNREACHABLE();
282 }
283 }
284
VisitVecCnv(HVecCnv * instruction)285 void LocationsBuilderARM64Neon::VisitVecCnv(HVecCnv* instruction) {
286 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
287 }
288
VisitVecCnv(HVecCnv * instruction)289 void InstructionCodeGeneratorARM64Neon::VisitVecCnv(HVecCnv* instruction) {
290 LocationSummary* locations = instruction->GetLocations();
291 VRegister src = VRegisterFrom(locations->InAt(0));
292 VRegister dst = VRegisterFrom(locations->Out());
293 DataType::Type from = instruction->GetInputType();
294 DataType::Type to = instruction->GetResultType();
295 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
296 DCHECK_EQ(4u, instruction->GetVectorLength());
297 __ Scvtf(dst.V4S(), src.V4S());
298 } else {
299 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
300 }
301 }
302
VisitVecNeg(HVecNeg * instruction)303 void LocationsBuilderARM64Neon::VisitVecNeg(HVecNeg* instruction) {
304 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
305 }
306
VisitVecNeg(HVecNeg * instruction)307 void InstructionCodeGeneratorARM64Neon::VisitVecNeg(HVecNeg* instruction) {
308 LocationSummary* locations = instruction->GetLocations();
309 VRegister src = VRegisterFrom(locations->InAt(0));
310 VRegister dst = VRegisterFrom(locations->Out());
311 switch (instruction->GetPackedType()) {
312 case DataType::Type::kUint8:
313 case DataType::Type::kInt8:
314 DCHECK_EQ(16u, instruction->GetVectorLength());
315 __ Neg(dst.V16B(), src.V16B());
316 break;
317 case DataType::Type::kUint16:
318 case DataType::Type::kInt16:
319 DCHECK_EQ(8u, instruction->GetVectorLength());
320 __ Neg(dst.V8H(), src.V8H());
321 break;
322 case DataType::Type::kInt32:
323 DCHECK_EQ(4u, instruction->GetVectorLength());
324 __ Neg(dst.V4S(), src.V4S());
325 break;
326 case DataType::Type::kInt64:
327 DCHECK_EQ(2u, instruction->GetVectorLength());
328 __ Neg(dst.V2D(), src.V2D());
329 break;
330 case DataType::Type::kFloat32:
331 DCHECK_EQ(4u, instruction->GetVectorLength());
332 __ Fneg(dst.V4S(), src.V4S());
333 break;
334 case DataType::Type::kFloat64:
335 DCHECK_EQ(2u, instruction->GetVectorLength());
336 __ Fneg(dst.V2D(), src.V2D());
337 break;
338 default:
339 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
340 UNREACHABLE();
341 }
342 }
343
VisitVecAbs(HVecAbs * instruction)344 void LocationsBuilderARM64Neon::VisitVecAbs(HVecAbs* instruction) {
345 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
346 }
347
VisitVecAbs(HVecAbs * instruction)348 void InstructionCodeGeneratorARM64Neon::VisitVecAbs(HVecAbs* instruction) {
349 LocationSummary* locations = instruction->GetLocations();
350 VRegister src = VRegisterFrom(locations->InAt(0));
351 VRegister dst = VRegisterFrom(locations->Out());
352 switch (instruction->GetPackedType()) {
353 case DataType::Type::kInt8:
354 DCHECK_EQ(16u, instruction->GetVectorLength());
355 __ Abs(dst.V16B(), src.V16B());
356 break;
357 case DataType::Type::kInt16:
358 DCHECK_EQ(8u, instruction->GetVectorLength());
359 __ Abs(dst.V8H(), src.V8H());
360 break;
361 case DataType::Type::kInt32:
362 DCHECK_EQ(4u, instruction->GetVectorLength());
363 __ Abs(dst.V4S(), src.V4S());
364 break;
365 case DataType::Type::kInt64:
366 DCHECK_EQ(2u, instruction->GetVectorLength());
367 __ Abs(dst.V2D(), src.V2D());
368 break;
369 case DataType::Type::kFloat32:
370 DCHECK_EQ(4u, instruction->GetVectorLength());
371 __ Fabs(dst.V4S(), src.V4S());
372 break;
373 case DataType::Type::kFloat64:
374 DCHECK_EQ(2u, instruction->GetVectorLength());
375 __ Fabs(dst.V2D(), src.V2D());
376 break;
377 default:
378 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
379 UNREACHABLE();
380 }
381 }
382
VisitVecNot(HVecNot * instruction)383 void LocationsBuilderARM64Neon::VisitVecNot(HVecNot* instruction) {
384 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
385 }
386
VisitVecNot(HVecNot * instruction)387 void InstructionCodeGeneratorARM64Neon::VisitVecNot(HVecNot* instruction) {
388 LocationSummary* locations = instruction->GetLocations();
389 VRegister src = VRegisterFrom(locations->InAt(0));
390 VRegister dst = VRegisterFrom(locations->Out());
391 switch (instruction->GetPackedType()) {
392 case DataType::Type::kBool: // special case boolean-not
393 DCHECK_EQ(16u, instruction->GetVectorLength());
394 __ Movi(dst.V16B(), 1);
395 __ Eor(dst.V16B(), dst.V16B(), src.V16B());
396 break;
397 case DataType::Type::kUint8:
398 case DataType::Type::kInt8:
399 case DataType::Type::kUint16:
400 case DataType::Type::kInt16:
401 case DataType::Type::kInt32:
402 case DataType::Type::kInt64:
403 __ Not(dst.V16B(), src.V16B()); // lanes do not matter
404 break;
405 default:
406 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
407 UNREACHABLE();
408 }
409 }
410
411 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)412 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
413 LocationSummary* locations = new (allocator) LocationSummary(instruction);
414 switch (instruction->GetPackedType()) {
415 case DataType::Type::kBool:
416 case DataType::Type::kUint8:
417 case DataType::Type::kInt8:
418 case DataType::Type::kUint16:
419 case DataType::Type::kInt16:
420 case DataType::Type::kInt32:
421 case DataType::Type::kInt64:
422 case DataType::Type::kFloat32:
423 case DataType::Type::kFloat64:
424 locations->SetInAt(0, Location::RequiresFpuRegister());
425 locations->SetInAt(1, Location::RequiresFpuRegister());
426 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
427 break;
428 default:
429 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
430 UNREACHABLE();
431 }
432 }
433
VisitVecAdd(HVecAdd * instruction)434 void LocationsBuilderARM64Neon::VisitVecAdd(HVecAdd* instruction) {
435 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
436 }
437
VisitVecAdd(HVecAdd * instruction)438 void InstructionCodeGeneratorARM64Neon::VisitVecAdd(HVecAdd* instruction) {
439 LocationSummary* locations = instruction->GetLocations();
440 VRegister lhs = VRegisterFrom(locations->InAt(0));
441 VRegister rhs = VRegisterFrom(locations->InAt(1));
442 VRegister dst = VRegisterFrom(locations->Out());
443 switch (instruction->GetPackedType()) {
444 case DataType::Type::kUint8:
445 case DataType::Type::kInt8:
446 DCHECK_EQ(16u, instruction->GetVectorLength());
447 __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
448 break;
449 case DataType::Type::kUint16:
450 case DataType::Type::kInt16:
451 DCHECK_EQ(8u, instruction->GetVectorLength());
452 __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
453 break;
454 case DataType::Type::kInt32:
455 DCHECK_EQ(4u, instruction->GetVectorLength());
456 __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
457 break;
458 case DataType::Type::kInt64:
459 DCHECK_EQ(2u, instruction->GetVectorLength());
460 __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
461 break;
462 case DataType::Type::kFloat32:
463 DCHECK_EQ(4u, instruction->GetVectorLength());
464 __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
465 break;
466 case DataType::Type::kFloat64:
467 DCHECK_EQ(2u, instruction->GetVectorLength());
468 __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
469 break;
470 default:
471 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
472 UNREACHABLE();
473 }
474 }
475
VisitVecSaturationAdd(HVecSaturationAdd * instruction)476 void LocationsBuilderARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
477 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
478 }
479
VisitVecSaturationAdd(HVecSaturationAdd * instruction)480 void InstructionCodeGeneratorARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
481 LocationSummary* locations = instruction->GetLocations();
482 VRegister lhs = VRegisterFrom(locations->InAt(0));
483 VRegister rhs = VRegisterFrom(locations->InAt(1));
484 VRegister dst = VRegisterFrom(locations->Out());
485 switch (instruction->GetPackedType()) {
486 case DataType::Type::kUint8:
487 DCHECK_EQ(16u, instruction->GetVectorLength());
488 __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
489 break;
490 case DataType::Type::kInt8:
491 DCHECK_EQ(16u, instruction->GetVectorLength());
492 __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
493 break;
494 case DataType::Type::kUint16:
495 DCHECK_EQ(8u, instruction->GetVectorLength());
496 __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
497 break;
498 case DataType::Type::kInt16:
499 DCHECK_EQ(8u, instruction->GetVectorLength());
500 __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
501 break;
502 default:
503 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
504 UNREACHABLE();
505 }
506 }
507
VisitVecHalvingAdd(HVecHalvingAdd * instruction)508 void LocationsBuilderARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
509 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
510 }
511
VisitVecHalvingAdd(HVecHalvingAdd * instruction)512 void InstructionCodeGeneratorARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
513 LocationSummary* locations = instruction->GetLocations();
514 VRegister lhs = VRegisterFrom(locations->InAt(0));
515 VRegister rhs = VRegisterFrom(locations->InAt(1));
516 VRegister dst = VRegisterFrom(locations->Out());
517 switch (instruction->GetPackedType()) {
518 case DataType::Type::kUint8:
519 DCHECK_EQ(16u, instruction->GetVectorLength());
520 instruction->IsRounded()
521 ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
522 : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
523 break;
524 case DataType::Type::kInt8:
525 DCHECK_EQ(16u, instruction->GetVectorLength());
526 instruction->IsRounded()
527 ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
528 : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
529 break;
530 case DataType::Type::kUint16:
531 DCHECK_EQ(8u, instruction->GetVectorLength());
532 instruction->IsRounded()
533 ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
534 : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
535 break;
536 case DataType::Type::kInt16:
537 DCHECK_EQ(8u, instruction->GetVectorLength());
538 instruction->IsRounded()
539 ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
540 : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
541 break;
542 default:
543 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
544 UNREACHABLE();
545 }
546 }
547
VisitVecSub(HVecSub * instruction)548 void LocationsBuilderARM64Neon::VisitVecSub(HVecSub* instruction) {
549 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
550 }
551
VisitVecSub(HVecSub * instruction)552 void InstructionCodeGeneratorARM64Neon::VisitVecSub(HVecSub* instruction) {
553 LocationSummary* locations = instruction->GetLocations();
554 VRegister lhs = VRegisterFrom(locations->InAt(0));
555 VRegister rhs = VRegisterFrom(locations->InAt(1));
556 VRegister dst = VRegisterFrom(locations->Out());
557 switch (instruction->GetPackedType()) {
558 case DataType::Type::kUint8:
559 case DataType::Type::kInt8:
560 DCHECK_EQ(16u, instruction->GetVectorLength());
561 __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
562 break;
563 case DataType::Type::kUint16:
564 case DataType::Type::kInt16:
565 DCHECK_EQ(8u, instruction->GetVectorLength());
566 __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
567 break;
568 case DataType::Type::kInt32:
569 DCHECK_EQ(4u, instruction->GetVectorLength());
570 __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
571 break;
572 case DataType::Type::kInt64:
573 DCHECK_EQ(2u, instruction->GetVectorLength());
574 __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
575 break;
576 case DataType::Type::kFloat32:
577 DCHECK_EQ(4u, instruction->GetVectorLength());
578 __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
579 break;
580 case DataType::Type::kFloat64:
581 DCHECK_EQ(2u, instruction->GetVectorLength());
582 __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
583 break;
584 default:
585 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
586 UNREACHABLE();
587 }
588 }
589
VisitVecSaturationSub(HVecSaturationSub * instruction)590 void LocationsBuilderARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
591 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
592 }
593
VisitVecSaturationSub(HVecSaturationSub * instruction)594 void InstructionCodeGeneratorARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
595 LocationSummary* locations = instruction->GetLocations();
596 VRegister lhs = VRegisterFrom(locations->InAt(0));
597 VRegister rhs = VRegisterFrom(locations->InAt(1));
598 VRegister dst = VRegisterFrom(locations->Out());
599 switch (instruction->GetPackedType()) {
600 case DataType::Type::kUint8:
601 DCHECK_EQ(16u, instruction->GetVectorLength());
602 __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
603 break;
604 case DataType::Type::kInt8:
605 DCHECK_EQ(16u, instruction->GetVectorLength());
606 __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
607 break;
608 case DataType::Type::kUint16:
609 DCHECK_EQ(8u, instruction->GetVectorLength());
610 __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
611 break;
612 case DataType::Type::kInt16:
613 DCHECK_EQ(8u, instruction->GetVectorLength());
614 __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
615 break;
616 default:
617 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
618 UNREACHABLE();
619 }
620 }
621
VisitVecMul(HVecMul * instruction)622 void LocationsBuilderARM64Neon::VisitVecMul(HVecMul* instruction) {
623 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
624 }
625
VisitVecMul(HVecMul * instruction)626 void InstructionCodeGeneratorARM64Neon::VisitVecMul(HVecMul* instruction) {
627 LocationSummary* locations = instruction->GetLocations();
628 VRegister lhs = VRegisterFrom(locations->InAt(0));
629 VRegister rhs = VRegisterFrom(locations->InAt(1));
630 VRegister dst = VRegisterFrom(locations->Out());
631 switch (instruction->GetPackedType()) {
632 case DataType::Type::kUint8:
633 case DataType::Type::kInt8:
634 DCHECK_EQ(16u, instruction->GetVectorLength());
635 __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
636 break;
637 case DataType::Type::kUint16:
638 case DataType::Type::kInt16:
639 DCHECK_EQ(8u, instruction->GetVectorLength());
640 __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
641 break;
642 case DataType::Type::kInt32:
643 DCHECK_EQ(4u, instruction->GetVectorLength());
644 __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
645 break;
646 case DataType::Type::kFloat32:
647 DCHECK_EQ(4u, instruction->GetVectorLength());
648 __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
649 break;
650 case DataType::Type::kFloat64:
651 DCHECK_EQ(2u, instruction->GetVectorLength());
652 __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
653 break;
654 default:
655 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
656 UNREACHABLE();
657 }
658 }
659
VisitVecDiv(HVecDiv * instruction)660 void LocationsBuilderARM64Neon::VisitVecDiv(HVecDiv* instruction) {
661 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
662 }
663
VisitVecDiv(HVecDiv * instruction)664 void InstructionCodeGeneratorARM64Neon::VisitVecDiv(HVecDiv* instruction) {
665 LocationSummary* locations = instruction->GetLocations();
666 VRegister lhs = VRegisterFrom(locations->InAt(0));
667 VRegister rhs = VRegisterFrom(locations->InAt(1));
668 VRegister dst = VRegisterFrom(locations->Out());
669 switch (instruction->GetPackedType()) {
670 case DataType::Type::kFloat32:
671 DCHECK_EQ(4u, instruction->GetVectorLength());
672 __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
673 break;
674 case DataType::Type::kFloat64:
675 DCHECK_EQ(2u, instruction->GetVectorLength());
676 __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
677 break;
678 default:
679 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
680 UNREACHABLE();
681 }
682 }
683
VisitVecMin(HVecMin * instruction)684 void LocationsBuilderARM64Neon::VisitVecMin(HVecMin* instruction) {
685 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
686 }
687
VisitVecMin(HVecMin * instruction)688 void InstructionCodeGeneratorARM64Neon::VisitVecMin(HVecMin* instruction) {
689 LocationSummary* locations = instruction->GetLocations();
690 VRegister lhs = VRegisterFrom(locations->InAt(0));
691 VRegister rhs = VRegisterFrom(locations->InAt(1));
692 VRegister dst = VRegisterFrom(locations->Out());
693 switch (instruction->GetPackedType()) {
694 case DataType::Type::kUint8:
695 DCHECK_EQ(16u, instruction->GetVectorLength());
696 __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
697 break;
698 case DataType::Type::kInt8:
699 DCHECK_EQ(16u, instruction->GetVectorLength());
700 __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
701 break;
702 case DataType::Type::kUint16:
703 DCHECK_EQ(8u, instruction->GetVectorLength());
704 __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
705 break;
706 case DataType::Type::kInt16:
707 DCHECK_EQ(8u, instruction->GetVectorLength());
708 __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
709 break;
710 case DataType::Type::kUint32:
711 DCHECK_EQ(4u, instruction->GetVectorLength());
712 __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
713 break;
714 case DataType::Type::kInt32:
715 DCHECK_EQ(4u, instruction->GetVectorLength());
716 __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
717 break;
718 case DataType::Type::kFloat32:
719 DCHECK_EQ(4u, instruction->GetVectorLength());
720 __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
721 break;
722 case DataType::Type::kFloat64:
723 DCHECK_EQ(2u, instruction->GetVectorLength());
724 __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
725 break;
726 default:
727 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
728 UNREACHABLE();
729 }
730 }
731
VisitVecMax(HVecMax * instruction)732 void LocationsBuilderARM64Neon::VisitVecMax(HVecMax* instruction) {
733 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
734 }
735
VisitVecMax(HVecMax * instruction)736 void InstructionCodeGeneratorARM64Neon::VisitVecMax(HVecMax* instruction) {
737 LocationSummary* locations = instruction->GetLocations();
738 VRegister lhs = VRegisterFrom(locations->InAt(0));
739 VRegister rhs = VRegisterFrom(locations->InAt(1));
740 VRegister dst = VRegisterFrom(locations->Out());
741 switch (instruction->GetPackedType()) {
742 case DataType::Type::kUint8:
743 DCHECK_EQ(16u, instruction->GetVectorLength());
744 __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
745 break;
746 case DataType::Type::kInt8:
747 DCHECK_EQ(16u, instruction->GetVectorLength());
748 __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
749 break;
750 case DataType::Type::kUint16:
751 DCHECK_EQ(8u, instruction->GetVectorLength());
752 __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
753 break;
754 case DataType::Type::kInt16:
755 DCHECK_EQ(8u, instruction->GetVectorLength());
756 __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
757 break;
758 case DataType::Type::kUint32:
759 DCHECK_EQ(4u, instruction->GetVectorLength());
760 __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
761 break;
762 case DataType::Type::kInt32:
763 DCHECK_EQ(4u, instruction->GetVectorLength());
764 __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
765 break;
766 case DataType::Type::kFloat32:
767 DCHECK_EQ(4u, instruction->GetVectorLength());
768 __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
769 break;
770 case DataType::Type::kFloat64:
771 DCHECK_EQ(2u, instruction->GetVectorLength());
772 __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
773 break;
774 default:
775 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
776 UNREACHABLE();
777 }
778 }
779
VisitVecAnd(HVecAnd * instruction)780 void LocationsBuilderARM64Neon::VisitVecAnd(HVecAnd* instruction) {
781 // TODO: Allow constants supported by BIC (vector, immediate).
782 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
783 }
784
VisitVecAnd(HVecAnd * instruction)785 void InstructionCodeGeneratorARM64Neon::VisitVecAnd(HVecAnd* instruction) {
786 LocationSummary* locations = instruction->GetLocations();
787 VRegister lhs = VRegisterFrom(locations->InAt(0));
788 VRegister rhs = VRegisterFrom(locations->InAt(1));
789 VRegister dst = VRegisterFrom(locations->Out());
790 switch (instruction->GetPackedType()) {
791 case DataType::Type::kBool:
792 case DataType::Type::kUint8:
793 case DataType::Type::kInt8:
794 case DataType::Type::kUint16:
795 case DataType::Type::kInt16:
796 case DataType::Type::kInt32:
797 case DataType::Type::kInt64:
798 case DataType::Type::kFloat32:
799 case DataType::Type::kFloat64:
800 __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
801 break;
802 default:
803 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
804 UNREACHABLE();
805 }
806 }
807
VisitVecAndNot(HVecAndNot * instruction)808 void LocationsBuilderARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
809 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
810 }
811
VisitVecAndNot(HVecAndNot * instruction)812 void InstructionCodeGeneratorARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
813 // TODO: Use BIC (vector, register).
814 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
815 }
816
VisitVecOr(HVecOr * instruction)817 void LocationsBuilderARM64Neon::VisitVecOr(HVecOr* instruction) {
818 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
819 }
820
VisitVecOr(HVecOr * instruction)821 void InstructionCodeGeneratorARM64Neon::VisitVecOr(HVecOr* instruction) {
822 LocationSummary* locations = instruction->GetLocations();
823 VRegister lhs = VRegisterFrom(locations->InAt(0));
824 VRegister rhs = VRegisterFrom(locations->InAt(1));
825 VRegister dst = VRegisterFrom(locations->Out());
826 switch (instruction->GetPackedType()) {
827 case DataType::Type::kBool:
828 case DataType::Type::kUint8:
829 case DataType::Type::kInt8:
830 case DataType::Type::kUint16:
831 case DataType::Type::kInt16:
832 case DataType::Type::kInt32:
833 case DataType::Type::kInt64:
834 case DataType::Type::kFloat32:
835 case DataType::Type::kFloat64:
836 __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
837 break;
838 default:
839 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
840 UNREACHABLE();
841 }
842 }
843
VisitVecXor(HVecXor * instruction)844 void LocationsBuilderARM64Neon::VisitVecXor(HVecXor* instruction) {
845 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
846 }
847
VisitVecXor(HVecXor * instruction)848 void InstructionCodeGeneratorARM64Neon::VisitVecXor(HVecXor* instruction) {
849 LocationSummary* locations = instruction->GetLocations();
850 VRegister lhs = VRegisterFrom(locations->InAt(0));
851 VRegister rhs = VRegisterFrom(locations->InAt(1));
852 VRegister dst = VRegisterFrom(locations->Out());
853 switch (instruction->GetPackedType()) {
854 case DataType::Type::kBool:
855 case DataType::Type::kUint8:
856 case DataType::Type::kInt8:
857 case DataType::Type::kUint16:
858 case DataType::Type::kInt16:
859 case DataType::Type::kInt32:
860 case DataType::Type::kInt64:
861 case DataType::Type::kFloat32:
862 case DataType::Type::kFloat64:
863 __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
864 break;
865 default:
866 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
867 UNREACHABLE();
868 }
869 }
870
871 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)872 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
873 LocationSummary* locations = new (allocator) LocationSummary(instruction);
874 switch (instruction->GetPackedType()) {
875 case DataType::Type::kUint8:
876 case DataType::Type::kInt8:
877 case DataType::Type::kUint16:
878 case DataType::Type::kInt16:
879 case DataType::Type::kInt32:
880 case DataType::Type::kInt64:
881 locations->SetInAt(0, Location::RequiresFpuRegister());
882 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
883 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
884 break;
885 default:
886 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
887 UNREACHABLE();
888 }
889 }
890
VisitVecShl(HVecShl * instruction)891 void LocationsBuilderARM64Neon::VisitVecShl(HVecShl* instruction) {
892 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
893 }
894
VisitVecShl(HVecShl * instruction)895 void InstructionCodeGeneratorARM64Neon::VisitVecShl(HVecShl* instruction) {
896 LocationSummary* locations = instruction->GetLocations();
897 VRegister lhs = VRegisterFrom(locations->InAt(0));
898 VRegister dst = VRegisterFrom(locations->Out());
899 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
900 switch (instruction->GetPackedType()) {
901 case DataType::Type::kUint8:
902 case DataType::Type::kInt8:
903 DCHECK_EQ(16u, instruction->GetVectorLength());
904 __ Shl(dst.V16B(), lhs.V16B(), value);
905 break;
906 case DataType::Type::kUint16:
907 case DataType::Type::kInt16:
908 DCHECK_EQ(8u, instruction->GetVectorLength());
909 __ Shl(dst.V8H(), lhs.V8H(), value);
910 break;
911 case DataType::Type::kInt32:
912 DCHECK_EQ(4u, instruction->GetVectorLength());
913 __ Shl(dst.V4S(), lhs.V4S(), value);
914 break;
915 case DataType::Type::kInt64:
916 DCHECK_EQ(2u, instruction->GetVectorLength());
917 __ Shl(dst.V2D(), lhs.V2D(), value);
918 break;
919 default:
920 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
921 UNREACHABLE();
922 }
923 }
924
VisitVecShr(HVecShr * instruction)925 void LocationsBuilderARM64Neon::VisitVecShr(HVecShr* instruction) {
926 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
927 }
928
VisitVecShr(HVecShr * instruction)929 void InstructionCodeGeneratorARM64Neon::VisitVecShr(HVecShr* instruction) {
930 LocationSummary* locations = instruction->GetLocations();
931 VRegister lhs = VRegisterFrom(locations->InAt(0));
932 VRegister dst = VRegisterFrom(locations->Out());
933 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
934 switch (instruction->GetPackedType()) {
935 case DataType::Type::kUint8:
936 case DataType::Type::kInt8:
937 DCHECK_EQ(16u, instruction->GetVectorLength());
938 __ Sshr(dst.V16B(), lhs.V16B(), value);
939 break;
940 case DataType::Type::kUint16:
941 case DataType::Type::kInt16:
942 DCHECK_EQ(8u, instruction->GetVectorLength());
943 __ Sshr(dst.V8H(), lhs.V8H(), value);
944 break;
945 case DataType::Type::kInt32:
946 DCHECK_EQ(4u, instruction->GetVectorLength());
947 __ Sshr(dst.V4S(), lhs.V4S(), value);
948 break;
949 case DataType::Type::kInt64:
950 DCHECK_EQ(2u, instruction->GetVectorLength());
951 __ Sshr(dst.V2D(), lhs.V2D(), value);
952 break;
953 default:
954 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
955 UNREACHABLE();
956 }
957 }
958
VisitVecUShr(HVecUShr * instruction)959 void LocationsBuilderARM64Neon::VisitVecUShr(HVecUShr* instruction) {
960 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
961 }
962
VisitVecUShr(HVecUShr * instruction)963 void InstructionCodeGeneratorARM64Neon::VisitVecUShr(HVecUShr* instruction) {
964 LocationSummary* locations = instruction->GetLocations();
965 VRegister lhs = VRegisterFrom(locations->InAt(0));
966 VRegister dst = VRegisterFrom(locations->Out());
967 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
968 switch (instruction->GetPackedType()) {
969 case DataType::Type::kUint8:
970 case DataType::Type::kInt8:
971 DCHECK_EQ(16u, instruction->GetVectorLength());
972 __ Ushr(dst.V16B(), lhs.V16B(), value);
973 break;
974 case DataType::Type::kUint16:
975 case DataType::Type::kInt16:
976 DCHECK_EQ(8u, instruction->GetVectorLength());
977 __ Ushr(dst.V8H(), lhs.V8H(), value);
978 break;
979 case DataType::Type::kInt32:
980 DCHECK_EQ(4u, instruction->GetVectorLength());
981 __ Ushr(dst.V4S(), lhs.V4S(), value);
982 break;
983 case DataType::Type::kInt64:
984 DCHECK_EQ(2u, instruction->GetVectorLength());
985 __ Ushr(dst.V2D(), lhs.V2D(), value);
986 break;
987 default:
988 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
989 UNREACHABLE();
990 }
991 }
992
VisitVecSetScalars(HVecSetScalars * instruction)993 void LocationsBuilderARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
994 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
995
996 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
997
998 HInstruction* input = instruction->InputAt(0);
999 bool is_zero = IsZeroBitPattern(input);
1000
1001 switch (instruction->GetPackedType()) {
1002 case DataType::Type::kBool:
1003 case DataType::Type::kUint8:
1004 case DataType::Type::kInt8:
1005 case DataType::Type::kUint16:
1006 case DataType::Type::kInt16:
1007 case DataType::Type::kInt32:
1008 case DataType::Type::kInt64:
1009 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
1010 : Location::RequiresRegister());
1011 locations->SetOut(Location::RequiresFpuRegister());
1012 break;
1013 case DataType::Type::kFloat32:
1014 case DataType::Type::kFloat64:
1015 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
1016 : Location::RequiresFpuRegister());
1017 locations->SetOut(Location::RequiresFpuRegister());
1018 break;
1019 default:
1020 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1021 UNREACHABLE();
1022 }
1023 }
1024
VisitVecSetScalars(HVecSetScalars * instruction)1025 void InstructionCodeGeneratorARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
1026 LocationSummary* locations = instruction->GetLocations();
1027 VRegister dst = VRegisterFrom(locations->Out());
1028
1029 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
1030
1031 // Zero out all other elements first.
1032 __ Movi(dst.V16B(), 0);
1033
1034 // Shorthand for any type of zero.
1035 if (IsZeroBitPattern(instruction->InputAt(0))) {
1036 return;
1037 }
1038
1039 // Set required elements.
1040 switch (instruction->GetPackedType()) {
1041 case DataType::Type::kBool:
1042 case DataType::Type::kUint8:
1043 case DataType::Type::kInt8:
1044 DCHECK_EQ(16u, instruction->GetVectorLength());
1045 __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
1046 break;
1047 case DataType::Type::kUint16:
1048 case DataType::Type::kInt16:
1049 DCHECK_EQ(8u, instruction->GetVectorLength());
1050 __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
1051 break;
1052 case DataType::Type::kInt32:
1053 DCHECK_EQ(4u, instruction->GetVectorLength());
1054 __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
1055 break;
1056 case DataType::Type::kInt64:
1057 DCHECK_EQ(2u, instruction->GetVectorLength());
1058 __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
1059 break;
1060 default:
1061 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1062 UNREACHABLE();
1063 }
1064 }
1065
1066 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1067 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1068 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1069 switch (instruction->GetPackedType()) {
1070 case DataType::Type::kUint8:
1071 case DataType::Type::kInt8:
1072 case DataType::Type::kUint16:
1073 case DataType::Type::kInt16:
1074 case DataType::Type::kInt32:
1075 case DataType::Type::kInt64:
1076 locations->SetInAt(0, Location::RequiresFpuRegister());
1077 locations->SetInAt(1, Location::RequiresFpuRegister());
1078 locations->SetInAt(2, Location::RequiresFpuRegister());
1079 locations->SetOut(Location::SameAsFirstInput());
1080 break;
1081 default:
1082 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1083 UNREACHABLE();
1084 }
1085 }
1086
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1087 void LocationsBuilderARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1088 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1089 }
1090
1091 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
1092 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
1093 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1094 void InstructionCodeGeneratorARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1095 LocationSummary* locations = instruction->GetLocations();
1096 VRegister acc = VRegisterFrom(locations->InAt(0));
1097 VRegister left = VRegisterFrom(locations->InAt(1));
1098 VRegister right = VRegisterFrom(locations->InAt(2));
1099
1100 DCHECK(locations->InAt(0).Equals(locations->Out()));
1101
1102 switch (instruction->GetPackedType()) {
1103 case DataType::Type::kUint8:
1104 case DataType::Type::kInt8:
1105 DCHECK_EQ(16u, instruction->GetVectorLength());
1106 if (instruction->GetOpKind() == HInstruction::kAdd) {
1107 __ Mla(acc.V16B(), left.V16B(), right.V16B());
1108 } else {
1109 __ Mls(acc.V16B(), left.V16B(), right.V16B());
1110 }
1111 break;
1112 case DataType::Type::kUint16:
1113 case DataType::Type::kInt16:
1114 DCHECK_EQ(8u, instruction->GetVectorLength());
1115 if (instruction->GetOpKind() == HInstruction::kAdd) {
1116 __ Mla(acc.V8H(), left.V8H(), right.V8H());
1117 } else {
1118 __ Mls(acc.V8H(), left.V8H(), right.V8H());
1119 }
1120 break;
1121 case DataType::Type::kInt32:
1122 DCHECK_EQ(4u, instruction->GetVectorLength());
1123 if (instruction->GetOpKind() == HInstruction::kAdd) {
1124 __ Mla(acc.V4S(), left.V4S(), right.V4S());
1125 } else {
1126 __ Mls(acc.V4S(), left.V4S(), right.V4S());
1127 }
1128 break;
1129 default:
1130 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1131 UNREACHABLE();
1132 }
1133 }
1134
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1135 void LocationsBuilderARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1136 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1137 // Some conversions require temporary registers.
1138 LocationSummary* locations = instruction->GetLocations();
1139 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1140 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1141 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1142 HVecOperation::ToSignedType(b->GetPackedType()));
1143 switch (a->GetPackedType()) {
1144 case DataType::Type::kUint8:
1145 case DataType::Type::kInt8:
1146 switch (instruction->GetPackedType()) {
1147 case DataType::Type::kInt64:
1148 locations->AddTemp(Location::RequiresFpuRegister());
1149 locations->AddTemp(Location::RequiresFpuRegister());
1150 FALLTHROUGH_INTENDED;
1151 case DataType::Type::kInt32:
1152 locations->AddTemp(Location::RequiresFpuRegister());
1153 locations->AddTemp(Location::RequiresFpuRegister());
1154 break;
1155 default:
1156 break;
1157 }
1158 break;
1159 case DataType::Type::kUint16:
1160 case DataType::Type::kInt16:
1161 if (instruction->GetPackedType() == DataType::Type::kInt64) {
1162 locations->AddTemp(Location::RequiresFpuRegister());
1163 locations->AddTemp(Location::RequiresFpuRegister());
1164 }
1165 break;
1166 case DataType::Type::kInt32:
1167 case DataType::Type::kInt64:
1168 if (instruction->GetPackedType() == a->GetPackedType()) {
1169 locations->AddTemp(Location::RequiresFpuRegister());
1170 }
1171 break;
1172 default:
1173 break;
1174 }
1175 }
1176
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1177 void InstructionCodeGeneratorARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1178 LocationSummary* locations = instruction->GetLocations();
1179 VRegister acc = VRegisterFrom(locations->InAt(0));
1180 VRegister left = VRegisterFrom(locations->InAt(1));
1181 VRegister right = VRegisterFrom(locations->InAt(2));
1182
1183 DCHECK(locations->InAt(0).Equals(locations->Out()));
1184
1185 // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
1186 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1187 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1188 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1189 HVecOperation::ToSignedType(b->GetPackedType()));
1190 switch (a->GetPackedType()) {
1191 case DataType::Type::kUint8:
1192 case DataType::Type::kInt8:
1193 DCHECK_EQ(16u, a->GetVectorLength());
1194 switch (instruction->GetPackedType()) {
1195 case DataType::Type::kInt16:
1196 DCHECK_EQ(8u, instruction->GetVectorLength());
1197 __ Sabal(acc.V8H(), left.V8B(), right.V8B());
1198 __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
1199 break;
1200 case DataType::Type::kInt32: {
1201 DCHECK_EQ(4u, instruction->GetVectorLength());
1202 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1203 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1204 __ Sxtl(tmp1.V8H(), left.V8B());
1205 __ Sxtl(tmp2.V8H(), right.V8B());
1206 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1207 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1208 __ Sxtl2(tmp1.V8H(), left.V16B());
1209 __ Sxtl2(tmp2.V8H(), right.V16B());
1210 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1211 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1212 break;
1213 }
1214 case DataType::Type::kInt64: {
1215 DCHECK_EQ(2u, instruction->GetVectorLength());
1216 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1217 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1218 VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
1219 VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
1220 __ Sxtl(tmp1.V8H(), left.V8B());
1221 __ Sxtl(tmp2.V8H(), right.V8B());
1222 __ Sxtl(tmp3.V4S(), tmp1.V4H());
1223 __ Sxtl(tmp4.V4S(), tmp2.V4H());
1224 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1225 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1226 __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1227 __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1228 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1229 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1230 __ Sxtl2(tmp1.V8H(), left.V16B());
1231 __ Sxtl2(tmp2.V8H(), right.V16B());
1232 __ Sxtl(tmp3.V4S(), tmp1.V4H());
1233 __ Sxtl(tmp4.V4S(), tmp2.V4H());
1234 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1235 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1236 __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1237 __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1238 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1239 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1240 break;
1241 }
1242 default:
1243 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1244 UNREACHABLE();
1245 }
1246 break;
1247 case DataType::Type::kUint16:
1248 case DataType::Type::kInt16:
1249 DCHECK_EQ(8u, a->GetVectorLength());
1250 switch (instruction->GetPackedType()) {
1251 case DataType::Type::kInt32:
1252 DCHECK_EQ(4u, instruction->GetVectorLength());
1253 __ Sabal(acc.V4S(), left.V4H(), right.V4H());
1254 __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
1255 break;
1256 case DataType::Type::kInt64: {
1257 DCHECK_EQ(2u, instruction->GetVectorLength());
1258 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1259 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1260 __ Sxtl(tmp1.V4S(), left.V4H());
1261 __ Sxtl(tmp2.V4S(), right.V4H());
1262 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1263 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1264 __ Sxtl2(tmp1.V4S(), left.V8H());
1265 __ Sxtl2(tmp2.V4S(), right.V8H());
1266 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1267 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1268 break;
1269 }
1270 default:
1271 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1272 UNREACHABLE();
1273 }
1274 break;
1275 case DataType::Type::kInt32:
1276 DCHECK_EQ(4u, a->GetVectorLength());
1277 switch (instruction->GetPackedType()) {
1278 case DataType::Type::kInt32: {
1279 DCHECK_EQ(4u, instruction->GetVectorLength());
1280 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1281 __ Sub(tmp.V4S(), left.V4S(), right.V4S());
1282 __ Abs(tmp.V4S(), tmp.V4S());
1283 __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
1284 break;
1285 }
1286 case DataType::Type::kInt64:
1287 DCHECK_EQ(2u, instruction->GetVectorLength());
1288 __ Sabal(acc.V2D(), left.V2S(), right.V2S());
1289 __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
1290 break;
1291 default:
1292 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1293 UNREACHABLE();
1294 }
1295 break;
1296 case DataType::Type::kInt64:
1297 DCHECK_EQ(2u, a->GetVectorLength());
1298 switch (instruction->GetPackedType()) {
1299 case DataType::Type::kInt64: {
1300 DCHECK_EQ(2u, instruction->GetVectorLength());
1301 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1302 __ Sub(tmp.V2D(), left.V2D(), right.V2D());
1303 __ Abs(tmp.V2D(), tmp.V2D());
1304 __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
1305 break;
1306 }
1307 default:
1308 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1309 UNREACHABLE();
1310 }
1311 break;
1312 default:
1313 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1314 }
1315 }
1316
VisitVecDotProd(HVecDotProd * instruction)1317 void LocationsBuilderARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
1318 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1319 DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1320 locations->SetInAt(0, Location::RequiresFpuRegister());
1321 locations->SetInAt(1, Location::RequiresFpuRegister());
1322 locations->SetInAt(2, Location::RequiresFpuRegister());
1323 locations->SetOut(Location::SameAsFirstInput());
1324
1325 // For Int8 and Uint8 general case we need a temp register.
1326 if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
1327 !ShouldEmitDotProductInstructions(codegen_)) {
1328 locations->AddTemp(Location::RequiresFpuRegister());
1329 }
1330 }
1331
VisitVecDotProd(HVecDotProd * instruction)1332 void InstructionCodeGeneratorARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
1333 LocationSummary* locations = instruction->GetLocations();
1334 DCHECK(locations->InAt(0).Equals(locations->Out()));
1335 VRegister acc = VRegisterFrom(locations->InAt(0));
1336 VRegister left = VRegisterFrom(locations->InAt(1));
1337 VRegister right = VRegisterFrom(locations->InAt(2));
1338 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1339 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1340 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1341 HVecOperation::ToSignedType(b->GetPackedType()));
1342 DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1343 DCHECK_EQ(4u, instruction->GetVectorLength());
1344
1345 size_t inputs_data_size = DataType::Size(a->GetPackedType());
1346 switch (inputs_data_size) {
1347 case 1u: {
1348 DCHECK_EQ(16u, a->GetVectorLength());
1349 if (instruction->IsZeroExtending()) {
1350 if (ShouldEmitDotProductInstructions(codegen_)) {
1351 __ Udot(acc.V4S(), left.V16B(), right.V16B());
1352 } else {
1353 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1354 __ Umull(tmp.V8H(), left.V8B(), right.V8B());
1355 __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1356 __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1357
1358 __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
1359 __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1360 __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1361 }
1362 } else {
1363 if (ShouldEmitDotProductInstructions(codegen_)) {
1364 __ Sdot(acc.V4S(), left.V16B(), right.V16B());
1365 } else {
1366 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1367 __ Smull(tmp.V8H(), left.V8B(), right.V8B());
1368 __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1369 __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1370
1371 __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
1372 __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1373 __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1374 }
1375 }
1376 break;
1377 }
1378 case 2u:
1379 DCHECK_EQ(8u, a->GetVectorLength());
1380 if (instruction->IsZeroExtending()) {
1381 __ Umlal(acc.V4S(), left.V4H(), right.V4H());
1382 __ Umlal2(acc.V4S(), left.V8H(), right.V8H());
1383 } else {
1384 __ Smlal(acc.V4S(), left.V4H(), right.V4H());
1385 __ Smlal2(acc.V4S(), left.V8H(), right.V8H());
1386 }
1387 break;
1388 default:
1389 LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1390 }
1391 }
1392
1393 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1394 static void CreateVecMemLocations(ArenaAllocator* allocator,
1395 HVecMemoryOperation* instruction,
1396 bool is_load) {
1397 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1398 switch (instruction->GetPackedType()) {
1399 case DataType::Type::kBool:
1400 case DataType::Type::kUint8:
1401 case DataType::Type::kInt8:
1402 case DataType::Type::kUint16:
1403 case DataType::Type::kInt16:
1404 case DataType::Type::kInt32:
1405 case DataType::Type::kInt64:
1406 case DataType::Type::kFloat32:
1407 case DataType::Type::kFloat64:
1408 locations->SetInAt(0, Location::RequiresRegister());
1409 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1410 if (is_load) {
1411 locations->SetOut(Location::RequiresFpuRegister());
1412 } else {
1413 locations->SetInAt(2, Location::RequiresFpuRegister());
1414 }
1415 break;
1416 default:
1417 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1418 UNREACHABLE();
1419 }
1420 }
1421
VisitVecLoad(HVecLoad * instruction)1422 void LocationsBuilderARM64Neon::VisitVecLoad(HVecLoad* instruction) {
1423 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1424 }
1425
VisitVecLoad(HVecLoad * instruction)1426 void InstructionCodeGeneratorARM64Neon::VisitVecLoad(HVecLoad* instruction) {
1427 LocationSummary* locations = instruction->GetLocations();
1428 size_t size = DataType::Size(instruction->GetPackedType());
1429 VRegister reg = VRegisterFrom(locations->Out());
1430 UseScratchRegisterScope temps(GetVIXLAssembler());
1431 Register scratch;
1432
1433 switch (instruction->GetPackedType()) {
1434 case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1435 case DataType::Type::kUint16:
1436 DCHECK_EQ(8u, instruction->GetVectorLength());
1437 // Special handling of compressed/uncompressed string load.
1438 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1439 vixl::aarch64::Label uncompressed_load, done;
1440 // Test compression bit.
1441 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1442 "Expecting 0=compressed, 1=uncompressed");
1443 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1444 Register length = temps.AcquireW();
1445 __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
1446 __ Tbnz(length.W(), 0, &uncompressed_load);
1447 temps.Release(length); // no longer needed
1448 // Zero extend 8 compressed bytes into 8 chars.
1449 __ Ldr(DRegisterFrom(locations->Out()).V8B(),
1450 VecNEONAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
1451 __ Uxtl(reg.V8H(), reg.V8B());
1452 __ B(&done);
1453 if (scratch.IsValid()) {
1454 temps.Release(scratch); // if used, no longer needed
1455 }
1456 // Load 8 direct uncompressed chars.
1457 __ Bind(&uncompressed_load);
1458 __ Ldr(reg,
1459 VecNEONAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
1460 __ Bind(&done);
1461 return;
1462 }
1463 FALLTHROUGH_INTENDED;
1464 case DataType::Type::kBool:
1465 case DataType::Type::kUint8:
1466 case DataType::Type::kInt8:
1467 case DataType::Type::kInt32:
1468 case DataType::Type::kFloat32:
1469 case DataType::Type::kInt64:
1470 case DataType::Type::kFloat64:
1471 DCHECK_LE(2u, instruction->GetVectorLength());
1472 DCHECK_LE(instruction->GetVectorLength(), 16u);
1473 __ Ldr(reg,
1474 VecNEONAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
1475 break;
1476 default:
1477 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1478 UNREACHABLE();
1479 }
1480 }
1481
VisitVecStore(HVecStore * instruction)1482 void LocationsBuilderARM64Neon::VisitVecStore(HVecStore* instruction) {
1483 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1484 }
1485
VisitVecStore(HVecStore * instruction)1486 void InstructionCodeGeneratorARM64Neon::VisitVecStore(HVecStore* instruction) {
1487 LocationSummary* locations = instruction->GetLocations();
1488 size_t size = DataType::Size(instruction->GetPackedType());
1489 VRegister reg = VRegisterFrom(locations->InAt(2));
1490 UseScratchRegisterScope temps(GetVIXLAssembler());
1491 Register scratch;
1492
1493 switch (instruction->GetPackedType()) {
1494 case DataType::Type::kBool:
1495 case DataType::Type::kUint8:
1496 case DataType::Type::kInt8:
1497 case DataType::Type::kUint16:
1498 case DataType::Type::kInt16:
1499 case DataType::Type::kInt32:
1500 case DataType::Type::kFloat32:
1501 case DataType::Type::kInt64:
1502 case DataType::Type::kFloat64:
1503 DCHECK_LE(2u, instruction->GetVectorLength());
1504 DCHECK_LE(instruction->GetVectorLength(), 16u);
1505 __ Str(reg,
1506 VecNEONAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1507 break;
1508 default:
1509 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1510 UNREACHABLE();
1511 }
1512 }
1513
VisitVecPredSetAll(HVecPredSetAll * instruction)1514 void LocationsBuilderARM64Neon::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1515 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1516 DCHECK(instruction->InputAt(0)->IsIntConstant());
1517 locations->SetInAt(0, Location::NoLocation());
1518 locations->SetOut(Location::NoLocation());
1519 }
1520
VisitVecPredSetAll(HVecPredSetAll *)1521 void InstructionCodeGeneratorARM64Neon::VisitVecPredSetAll(HVecPredSetAll*) {
1522 }
1523
VisitVecPredWhile(HVecPredWhile * instruction)1524 void LocationsBuilderARM64Neon::VisitVecPredWhile(HVecPredWhile* instruction) {
1525 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1526 UNREACHABLE();
1527 }
1528
VisitVecPredWhile(HVecPredWhile * instruction)1529 void InstructionCodeGeneratorARM64Neon::VisitVecPredWhile(HVecPredWhile* instruction) {
1530 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1531 UNREACHABLE();
1532 }
1533
VisitVecPredToBoolean(HVecPredToBoolean * instruction)1534 void LocationsBuilderARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
1535 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1536 UNREACHABLE();
1537 }
1538
VisitVecPredToBoolean(HVecPredToBoolean * instruction)1539 void InstructionCodeGeneratorARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
1540 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1541 UNREACHABLE();
1542 }
1543
VisitVecCondition(HVecCondition * instruction)1544 void LocationsBuilderARM64Neon::VisitVecCondition(HVecCondition* instruction) {
1545 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1546 UNREACHABLE();
1547 }
1548
VisitVecCondition(HVecCondition * instruction)1549 void InstructionCodeGeneratorARM64Neon::VisitVecCondition(HVecCondition* instruction) {
1550 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1551 UNREACHABLE();
1552 }
1553
VisitVecPredNot(HVecPredNot * instruction)1554 void LocationsBuilderARM64Neon::VisitVecPredNot(HVecPredNot* instruction) {
1555 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1556 UNREACHABLE();
1557 }
1558
VisitVecPredNot(HVecPredNot * instruction)1559 void InstructionCodeGeneratorARM64Neon::VisitVecPredNot(HVecPredNot* instruction) {
1560 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1561 UNREACHABLE();
1562 }
1563
AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope * scope)1564 Location InstructionCodeGeneratorARM64Neon::AllocateSIMDScratchLocation(
1565 vixl::aarch64::UseScratchRegisterScope* scope) {
1566 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1567 return LocationFrom(scope->AcquireVRegisterOfSize(kQRegSize));
1568 }
1569
FreeSIMDScratchLocation(Location loc,vixl::aarch64::UseScratchRegisterScope * scope)1570 void InstructionCodeGeneratorARM64Neon::FreeSIMDScratchLocation(Location loc,
1571 vixl::aarch64::UseScratchRegisterScope* scope) {
1572 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1573 scope->Release(QRegisterFrom(loc));
1574 }
1575
LoadSIMDRegFromStack(Location destination,Location source)1576 void InstructionCodeGeneratorARM64Neon::LoadSIMDRegFromStack(Location destination,
1577 Location source) {
1578 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1579 __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1580 }
1581
MoveSIMDRegToSIMDReg(Location destination,Location source)1582 void InstructionCodeGeneratorARM64Neon::MoveSIMDRegToSIMDReg(Location destination,
1583 Location source) {
1584 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1585 __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1586 }
1587
MoveToSIMDStackSlot(Location destination,Location source)1588 void InstructionCodeGeneratorARM64Neon::MoveToSIMDStackSlot(Location destination,
1589 Location source) {
1590 DCHECK(destination.IsSIMDStackSlot());
1591 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1592
1593 if (source.IsFpuRegister()) {
1594 __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1595 } else {
1596 DCHECK(source.IsSIMDStackSlot());
1597 UseScratchRegisterScope temps(GetVIXLAssembler());
1598 if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
1599 Register temp = temps.AcquireX();
1600 __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1601 __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1602 __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1603 __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1604 } else {
1605 VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1606 __ Ldr(temp, StackOperandFrom(source));
1607 __ Str(temp, StackOperandFrom(destination));
1608 }
1609 }
1610 }
1611
1612 // Calculate memory accessing operand for save/restore live registers.
1613 template <bool is_save>
SaveRestoreLiveRegistersHelperNeonImpl(CodeGeneratorARM64 * codegen,LocationSummary * locations,int64_t spill_offset)1614 void SaveRestoreLiveRegistersHelperNeonImpl(CodeGeneratorARM64* codegen,
1615 LocationSummary* locations,
1616 int64_t spill_offset) {
1617 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
1618 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
1619 DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
1620 codegen->GetNumberOfCoreRegisters(),
1621 fp_spills,
1622 codegen->GetNumberOfFloatingPointRegisters()));
1623
1624 CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
1625 const unsigned v_reg_size_in_bits = codegen->GetSlowPathFPWidth() * 8;
1626 DCHECK_LE(codegen->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1627 CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size_in_bits, fp_spills);
1628
1629 MacroAssembler* masm = codegen->GetVIXLAssembler();
1630 UseScratchRegisterScope temps(masm);
1631
1632 Register base = masm->StackPointer();
1633 int64_t core_spill_size = core_list.GetTotalSizeInBytes();
1634 int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
1635 int64_t reg_size = kXRegSizeInBytes;
1636 int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
1637 uint32_t ls_access_size = WhichPowerOf2(reg_size);
1638 if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
1639 !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
1640 // If the offset does not fit in the instruction's immediate field, use an alternate register
1641 // to compute the base address(float point registers spill base address).
1642 Register new_base = temps.AcquireSameSizeAs(base);
1643 masm->Add(new_base, base, Operand(spill_offset + core_spill_size));
1644 base = new_base;
1645 spill_offset = -core_spill_size;
1646 int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
1647 DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
1648 DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
1649 }
1650
1651 if (is_save) {
1652 masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1653 masm->StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
1654 } else {
1655 masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1656 masm->LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
1657 }
1658 }
1659
SaveLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1660 void InstructionCodeGeneratorARM64Neon::SaveLiveRegistersHelper(LocationSummary* locations,
1661 int64_t spill_offset) {
1662 SaveRestoreLiveRegistersHelperNeonImpl</* is_save= */ true>(codegen_, locations, spill_offset);
1663 }
1664
RestoreLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1665 void InstructionCodeGeneratorARM64Neon::RestoreLiveRegistersHelper(LocationSummary* locations,
1666 int64_t spill_offset) {
1667 SaveRestoreLiveRegistersHelperNeonImpl</* is_save= */ false>(codegen_, locations, spill_offset);
1668 }
1669
1670 #undef __
1671
1672 } // namespace arm64
1673 } // namespace art
1674