1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm64.h"
18
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "mirror/array-inl.h"
21 #include "mirror/string.h"
22
23 using namespace vixl::aarch64; // NOLINT(build/namespaces)
24
25 namespace art {
26 namespace arm64 {
27
28 using helpers::ARM64EncodableConstantOrRegister;
29 using helpers::Arm64CanEncodeConstantAsImmediate;
30 using helpers::DRegisterFrom;
31 using helpers::HeapOperand;
32 using helpers::InputRegisterAt;
33 using helpers::Int64FromLocation;
34 using helpers::OutputRegister;
35 using helpers::VRegisterFrom;
36 using helpers::WRegisterFrom;
37 using helpers::XRegisterFrom;
38
39 #define __ GetVIXLAssembler()->
40
41 // Build-time switch for Armv8.4-a dot product instructions.
42 // TODO: Enable dot product when there is a device to test it on.
43 static constexpr bool kArm64EmitDotProdInstructions = false;
44
45 // Returns whether dot product instructions should be emitted.
ShouldEmitDotProductInstructions(const CodeGeneratorARM64 * codegen_)46 static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
47 return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd();
48 }
49
VisitVecReplicateScalar(HVecReplicateScalar * instruction)50 void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
51 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
52 HInstruction* input = instruction->InputAt(0);
53 switch (instruction->GetPackedType()) {
54 case DataType::Type::kBool:
55 case DataType::Type::kUint8:
56 case DataType::Type::kInt8:
57 case DataType::Type::kUint16:
58 case DataType::Type::kInt16:
59 case DataType::Type::kInt32:
60 case DataType::Type::kInt64:
61 locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
62 locations->SetOut(Location::RequiresFpuRegister());
63 break;
64 case DataType::Type::kFloat32:
65 case DataType::Type::kFloat64:
66 if (input->IsConstant() &&
67 Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
68 locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
69 locations->SetOut(Location::RequiresFpuRegister());
70 } else {
71 locations->SetInAt(0, Location::RequiresFpuRegister());
72 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
73 }
74 break;
75 default:
76 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
77 UNREACHABLE();
78 }
79 }
80
VisitVecReplicateScalar(HVecReplicateScalar * instruction)81 void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
82 LocationSummary* locations = instruction->GetLocations();
83 Location src_loc = locations->InAt(0);
84 VRegister dst = VRegisterFrom(locations->Out());
85 switch (instruction->GetPackedType()) {
86 case DataType::Type::kBool:
87 case DataType::Type::kUint8:
88 case DataType::Type::kInt8:
89 DCHECK_EQ(16u, instruction->GetVectorLength());
90 if (src_loc.IsConstant()) {
91 __ Movi(dst.V16B(), Int64FromLocation(src_loc));
92 } else {
93 __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
94 }
95 break;
96 case DataType::Type::kUint16:
97 case DataType::Type::kInt16:
98 DCHECK_EQ(8u, instruction->GetVectorLength());
99 if (src_loc.IsConstant()) {
100 __ Movi(dst.V8H(), Int64FromLocation(src_loc));
101 } else {
102 __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
103 }
104 break;
105 case DataType::Type::kInt32:
106 DCHECK_EQ(4u, instruction->GetVectorLength());
107 if (src_loc.IsConstant()) {
108 __ Movi(dst.V4S(), Int64FromLocation(src_loc));
109 } else {
110 __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
111 }
112 break;
113 case DataType::Type::kInt64:
114 DCHECK_EQ(2u, instruction->GetVectorLength());
115 if (src_loc.IsConstant()) {
116 __ Movi(dst.V2D(), Int64FromLocation(src_loc));
117 } else {
118 __ Dup(dst.V2D(), XRegisterFrom(src_loc));
119 }
120 break;
121 case DataType::Type::kFloat32:
122 DCHECK_EQ(4u, instruction->GetVectorLength());
123 if (src_loc.IsConstant()) {
124 __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
125 } else {
126 __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
127 }
128 break;
129 case DataType::Type::kFloat64:
130 DCHECK_EQ(2u, instruction->GetVectorLength());
131 if (src_loc.IsConstant()) {
132 __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
133 } else {
134 __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
135 }
136 break;
137 default:
138 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
139 UNREACHABLE();
140 }
141 }
142
VisitVecExtractScalar(HVecExtractScalar * instruction)143 void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
144 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
145 switch (instruction->GetPackedType()) {
146 case DataType::Type::kBool:
147 case DataType::Type::kUint8:
148 case DataType::Type::kInt8:
149 case DataType::Type::kUint16:
150 case DataType::Type::kInt16:
151 case DataType::Type::kInt32:
152 case DataType::Type::kInt64:
153 locations->SetInAt(0, Location::RequiresFpuRegister());
154 locations->SetOut(Location::RequiresRegister());
155 break;
156 case DataType::Type::kFloat32:
157 case DataType::Type::kFloat64:
158 locations->SetInAt(0, Location::RequiresFpuRegister());
159 locations->SetOut(Location::SameAsFirstInput());
160 break;
161 default:
162 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
163 UNREACHABLE();
164 }
165 }
166
VisitVecExtractScalar(HVecExtractScalar * instruction)167 void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
168 LocationSummary* locations = instruction->GetLocations();
169 VRegister src = VRegisterFrom(locations->InAt(0));
170 switch (instruction->GetPackedType()) {
171 case DataType::Type::kInt32:
172 DCHECK_EQ(4u, instruction->GetVectorLength());
173 __ Umov(OutputRegister(instruction), src.V4S(), 0);
174 break;
175 case DataType::Type::kInt64:
176 DCHECK_EQ(2u, instruction->GetVectorLength());
177 __ Umov(OutputRegister(instruction), src.V2D(), 0);
178 break;
179 case DataType::Type::kFloat32:
180 case DataType::Type::kFloat64:
181 DCHECK_LE(2u, instruction->GetVectorLength());
182 DCHECK_LE(instruction->GetVectorLength(), 4u);
183 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
184 break;
185 default:
186 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
187 UNREACHABLE();
188 }
189 }
190
191 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)192 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
193 LocationSummary* locations = new (allocator) LocationSummary(instruction);
194 switch (instruction->GetPackedType()) {
195 case DataType::Type::kBool:
196 locations->SetInAt(0, Location::RequiresFpuRegister());
197 locations->SetOut(Location::RequiresFpuRegister(),
198 instruction->IsVecNot() ? Location::kOutputOverlap
199 : Location::kNoOutputOverlap);
200 break;
201 case DataType::Type::kUint8:
202 case DataType::Type::kInt8:
203 case DataType::Type::kUint16:
204 case DataType::Type::kInt16:
205 case DataType::Type::kInt32:
206 case DataType::Type::kInt64:
207 case DataType::Type::kFloat32:
208 case DataType::Type::kFloat64:
209 locations->SetInAt(0, Location::RequiresFpuRegister());
210 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
211 break;
212 default:
213 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
214 UNREACHABLE();
215 }
216 }
217
VisitVecReduce(HVecReduce * instruction)218 void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
219 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
220 }
221
VisitVecReduce(HVecReduce * instruction)222 void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
223 LocationSummary* locations = instruction->GetLocations();
224 VRegister src = VRegisterFrom(locations->InAt(0));
225 VRegister dst = DRegisterFrom(locations->Out());
226 switch (instruction->GetPackedType()) {
227 case DataType::Type::kInt32:
228 DCHECK_EQ(4u, instruction->GetVectorLength());
229 switch (instruction->GetReductionKind()) {
230 case HVecReduce::kSum:
231 __ Addv(dst.S(), src.V4S());
232 break;
233 case HVecReduce::kMin:
234 __ Sminv(dst.S(), src.V4S());
235 break;
236 case HVecReduce::kMax:
237 __ Smaxv(dst.S(), src.V4S());
238 break;
239 }
240 break;
241 case DataType::Type::kInt64:
242 DCHECK_EQ(2u, instruction->GetVectorLength());
243 switch (instruction->GetReductionKind()) {
244 case HVecReduce::kSum:
245 __ Addp(dst.D(), src.V2D());
246 break;
247 default:
248 LOG(FATAL) << "Unsupported SIMD min/max";
249 UNREACHABLE();
250 }
251 break;
252 default:
253 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
254 UNREACHABLE();
255 }
256 }
257
VisitVecCnv(HVecCnv * instruction)258 void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
259 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
260 }
261
VisitVecCnv(HVecCnv * instruction)262 void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
263 LocationSummary* locations = instruction->GetLocations();
264 VRegister src = VRegisterFrom(locations->InAt(0));
265 VRegister dst = VRegisterFrom(locations->Out());
266 DataType::Type from = instruction->GetInputType();
267 DataType::Type to = instruction->GetResultType();
268 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
269 DCHECK_EQ(4u, instruction->GetVectorLength());
270 __ Scvtf(dst.V4S(), src.V4S());
271 } else {
272 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
273 }
274 }
275
VisitVecNeg(HVecNeg * instruction)276 void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
277 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
278 }
279
VisitVecNeg(HVecNeg * instruction)280 void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
281 LocationSummary* locations = instruction->GetLocations();
282 VRegister src = VRegisterFrom(locations->InAt(0));
283 VRegister dst = VRegisterFrom(locations->Out());
284 switch (instruction->GetPackedType()) {
285 case DataType::Type::kUint8:
286 case DataType::Type::kInt8:
287 DCHECK_EQ(16u, instruction->GetVectorLength());
288 __ Neg(dst.V16B(), src.V16B());
289 break;
290 case DataType::Type::kUint16:
291 case DataType::Type::kInt16:
292 DCHECK_EQ(8u, instruction->GetVectorLength());
293 __ Neg(dst.V8H(), src.V8H());
294 break;
295 case DataType::Type::kInt32:
296 DCHECK_EQ(4u, instruction->GetVectorLength());
297 __ Neg(dst.V4S(), src.V4S());
298 break;
299 case DataType::Type::kInt64:
300 DCHECK_EQ(2u, instruction->GetVectorLength());
301 __ Neg(dst.V2D(), src.V2D());
302 break;
303 case DataType::Type::kFloat32:
304 DCHECK_EQ(4u, instruction->GetVectorLength());
305 __ Fneg(dst.V4S(), src.V4S());
306 break;
307 case DataType::Type::kFloat64:
308 DCHECK_EQ(2u, instruction->GetVectorLength());
309 __ Fneg(dst.V2D(), src.V2D());
310 break;
311 default:
312 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
313 UNREACHABLE();
314 }
315 }
316
VisitVecAbs(HVecAbs * instruction)317 void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
318 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
319 }
320
VisitVecAbs(HVecAbs * instruction)321 void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
322 LocationSummary* locations = instruction->GetLocations();
323 VRegister src = VRegisterFrom(locations->InAt(0));
324 VRegister dst = VRegisterFrom(locations->Out());
325 switch (instruction->GetPackedType()) {
326 case DataType::Type::kInt8:
327 DCHECK_EQ(16u, instruction->GetVectorLength());
328 __ Abs(dst.V16B(), src.V16B());
329 break;
330 case DataType::Type::kInt16:
331 DCHECK_EQ(8u, instruction->GetVectorLength());
332 __ Abs(dst.V8H(), src.V8H());
333 break;
334 case DataType::Type::kInt32:
335 DCHECK_EQ(4u, instruction->GetVectorLength());
336 __ Abs(dst.V4S(), src.V4S());
337 break;
338 case DataType::Type::kInt64:
339 DCHECK_EQ(2u, instruction->GetVectorLength());
340 __ Abs(dst.V2D(), src.V2D());
341 break;
342 case DataType::Type::kFloat32:
343 DCHECK_EQ(4u, instruction->GetVectorLength());
344 __ Fabs(dst.V4S(), src.V4S());
345 break;
346 case DataType::Type::kFloat64:
347 DCHECK_EQ(2u, instruction->GetVectorLength());
348 __ Fabs(dst.V2D(), src.V2D());
349 break;
350 default:
351 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
352 UNREACHABLE();
353 }
354 }
355
VisitVecNot(HVecNot * instruction)356 void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
357 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
358 }
359
VisitVecNot(HVecNot * instruction)360 void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
361 LocationSummary* locations = instruction->GetLocations();
362 VRegister src = VRegisterFrom(locations->InAt(0));
363 VRegister dst = VRegisterFrom(locations->Out());
364 switch (instruction->GetPackedType()) {
365 case DataType::Type::kBool: // special case boolean-not
366 DCHECK_EQ(16u, instruction->GetVectorLength());
367 __ Movi(dst.V16B(), 1);
368 __ Eor(dst.V16B(), dst.V16B(), src.V16B());
369 break;
370 case DataType::Type::kUint8:
371 case DataType::Type::kInt8:
372 case DataType::Type::kUint16:
373 case DataType::Type::kInt16:
374 case DataType::Type::kInt32:
375 case DataType::Type::kInt64:
376 __ Not(dst.V16B(), src.V16B()); // lanes do not matter
377 break;
378 default:
379 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
380 UNREACHABLE();
381 }
382 }
383
384 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)385 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
386 LocationSummary* locations = new (allocator) LocationSummary(instruction);
387 switch (instruction->GetPackedType()) {
388 case DataType::Type::kBool:
389 case DataType::Type::kUint8:
390 case DataType::Type::kInt8:
391 case DataType::Type::kUint16:
392 case DataType::Type::kInt16:
393 case DataType::Type::kInt32:
394 case DataType::Type::kInt64:
395 case DataType::Type::kFloat32:
396 case DataType::Type::kFloat64:
397 locations->SetInAt(0, Location::RequiresFpuRegister());
398 locations->SetInAt(1, Location::RequiresFpuRegister());
399 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
400 break;
401 default:
402 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
403 UNREACHABLE();
404 }
405 }
406
VisitVecAdd(HVecAdd * instruction)407 void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
408 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
409 }
410
VisitVecAdd(HVecAdd * instruction)411 void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
412 LocationSummary* locations = instruction->GetLocations();
413 VRegister lhs = VRegisterFrom(locations->InAt(0));
414 VRegister rhs = VRegisterFrom(locations->InAt(1));
415 VRegister dst = VRegisterFrom(locations->Out());
416 switch (instruction->GetPackedType()) {
417 case DataType::Type::kUint8:
418 case DataType::Type::kInt8:
419 DCHECK_EQ(16u, instruction->GetVectorLength());
420 __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
421 break;
422 case DataType::Type::kUint16:
423 case DataType::Type::kInt16:
424 DCHECK_EQ(8u, instruction->GetVectorLength());
425 __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
426 break;
427 case DataType::Type::kInt32:
428 DCHECK_EQ(4u, instruction->GetVectorLength());
429 __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
430 break;
431 case DataType::Type::kInt64:
432 DCHECK_EQ(2u, instruction->GetVectorLength());
433 __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
434 break;
435 case DataType::Type::kFloat32:
436 DCHECK_EQ(4u, instruction->GetVectorLength());
437 __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
438 break;
439 case DataType::Type::kFloat64:
440 DCHECK_EQ(2u, instruction->GetVectorLength());
441 __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
442 break;
443 default:
444 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
445 UNREACHABLE();
446 }
447 }
448
VisitVecSaturationAdd(HVecSaturationAdd * instruction)449 void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
450 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
451 }
452
VisitVecSaturationAdd(HVecSaturationAdd * instruction)453 void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
454 LocationSummary* locations = instruction->GetLocations();
455 VRegister lhs = VRegisterFrom(locations->InAt(0));
456 VRegister rhs = VRegisterFrom(locations->InAt(1));
457 VRegister dst = VRegisterFrom(locations->Out());
458 switch (instruction->GetPackedType()) {
459 case DataType::Type::kUint8:
460 DCHECK_EQ(16u, instruction->GetVectorLength());
461 __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
462 break;
463 case DataType::Type::kInt8:
464 DCHECK_EQ(16u, instruction->GetVectorLength());
465 __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
466 break;
467 case DataType::Type::kUint16:
468 DCHECK_EQ(8u, instruction->GetVectorLength());
469 __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
470 break;
471 case DataType::Type::kInt16:
472 DCHECK_EQ(8u, instruction->GetVectorLength());
473 __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
474 break;
475 default:
476 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
477 UNREACHABLE();
478 }
479 }
480
VisitVecHalvingAdd(HVecHalvingAdd * instruction)481 void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
482 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
483 }
484
VisitVecHalvingAdd(HVecHalvingAdd * instruction)485 void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
486 LocationSummary* locations = instruction->GetLocations();
487 VRegister lhs = VRegisterFrom(locations->InAt(0));
488 VRegister rhs = VRegisterFrom(locations->InAt(1));
489 VRegister dst = VRegisterFrom(locations->Out());
490 switch (instruction->GetPackedType()) {
491 case DataType::Type::kUint8:
492 DCHECK_EQ(16u, instruction->GetVectorLength());
493 instruction->IsRounded()
494 ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
495 : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
496 break;
497 case DataType::Type::kInt8:
498 DCHECK_EQ(16u, instruction->GetVectorLength());
499 instruction->IsRounded()
500 ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
501 : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
502 break;
503 case DataType::Type::kUint16:
504 DCHECK_EQ(8u, instruction->GetVectorLength());
505 instruction->IsRounded()
506 ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
507 : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
508 break;
509 case DataType::Type::kInt16:
510 DCHECK_EQ(8u, instruction->GetVectorLength());
511 instruction->IsRounded()
512 ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
513 : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
514 break;
515 default:
516 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
517 UNREACHABLE();
518 }
519 }
520
VisitVecSub(HVecSub * instruction)521 void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
522 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
523 }
524
VisitVecSub(HVecSub * instruction)525 void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
526 LocationSummary* locations = instruction->GetLocations();
527 VRegister lhs = VRegisterFrom(locations->InAt(0));
528 VRegister rhs = VRegisterFrom(locations->InAt(1));
529 VRegister dst = VRegisterFrom(locations->Out());
530 switch (instruction->GetPackedType()) {
531 case DataType::Type::kUint8:
532 case DataType::Type::kInt8:
533 DCHECK_EQ(16u, instruction->GetVectorLength());
534 __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
535 break;
536 case DataType::Type::kUint16:
537 case DataType::Type::kInt16:
538 DCHECK_EQ(8u, instruction->GetVectorLength());
539 __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
540 break;
541 case DataType::Type::kInt32:
542 DCHECK_EQ(4u, instruction->GetVectorLength());
543 __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
544 break;
545 case DataType::Type::kInt64:
546 DCHECK_EQ(2u, instruction->GetVectorLength());
547 __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
548 break;
549 case DataType::Type::kFloat32:
550 DCHECK_EQ(4u, instruction->GetVectorLength());
551 __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
552 break;
553 case DataType::Type::kFloat64:
554 DCHECK_EQ(2u, instruction->GetVectorLength());
555 __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
556 break;
557 default:
558 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
559 UNREACHABLE();
560 }
561 }
562
VisitVecSaturationSub(HVecSaturationSub * instruction)563 void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
564 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
565 }
566
VisitVecSaturationSub(HVecSaturationSub * instruction)567 void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
568 LocationSummary* locations = instruction->GetLocations();
569 VRegister lhs = VRegisterFrom(locations->InAt(0));
570 VRegister rhs = VRegisterFrom(locations->InAt(1));
571 VRegister dst = VRegisterFrom(locations->Out());
572 switch (instruction->GetPackedType()) {
573 case DataType::Type::kUint8:
574 DCHECK_EQ(16u, instruction->GetVectorLength());
575 __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
576 break;
577 case DataType::Type::kInt8:
578 DCHECK_EQ(16u, instruction->GetVectorLength());
579 __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
580 break;
581 case DataType::Type::kUint16:
582 DCHECK_EQ(8u, instruction->GetVectorLength());
583 __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
584 break;
585 case DataType::Type::kInt16:
586 DCHECK_EQ(8u, instruction->GetVectorLength());
587 __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
588 break;
589 default:
590 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
591 UNREACHABLE();
592 }
593 }
594
VisitVecMul(HVecMul * instruction)595 void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
596 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
597 }
598
VisitVecMul(HVecMul * instruction)599 void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
600 LocationSummary* locations = instruction->GetLocations();
601 VRegister lhs = VRegisterFrom(locations->InAt(0));
602 VRegister rhs = VRegisterFrom(locations->InAt(1));
603 VRegister dst = VRegisterFrom(locations->Out());
604 switch (instruction->GetPackedType()) {
605 case DataType::Type::kUint8:
606 case DataType::Type::kInt8:
607 DCHECK_EQ(16u, instruction->GetVectorLength());
608 __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
609 break;
610 case DataType::Type::kUint16:
611 case DataType::Type::kInt16:
612 DCHECK_EQ(8u, instruction->GetVectorLength());
613 __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
614 break;
615 case DataType::Type::kInt32:
616 DCHECK_EQ(4u, instruction->GetVectorLength());
617 __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
618 break;
619 case DataType::Type::kFloat32:
620 DCHECK_EQ(4u, instruction->GetVectorLength());
621 __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
622 break;
623 case DataType::Type::kFloat64:
624 DCHECK_EQ(2u, instruction->GetVectorLength());
625 __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
626 break;
627 default:
628 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
629 UNREACHABLE();
630 }
631 }
632
VisitVecDiv(HVecDiv * instruction)633 void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
634 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
635 }
636
VisitVecDiv(HVecDiv * instruction)637 void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
638 LocationSummary* locations = instruction->GetLocations();
639 VRegister lhs = VRegisterFrom(locations->InAt(0));
640 VRegister rhs = VRegisterFrom(locations->InAt(1));
641 VRegister dst = VRegisterFrom(locations->Out());
642 switch (instruction->GetPackedType()) {
643 case DataType::Type::kFloat32:
644 DCHECK_EQ(4u, instruction->GetVectorLength());
645 __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
646 break;
647 case DataType::Type::kFloat64:
648 DCHECK_EQ(2u, instruction->GetVectorLength());
649 __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
650 break;
651 default:
652 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
653 UNREACHABLE();
654 }
655 }
656
VisitVecMin(HVecMin * instruction)657 void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
658 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
659 }
660
VisitVecMin(HVecMin * instruction)661 void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
662 LocationSummary* locations = instruction->GetLocations();
663 VRegister lhs = VRegisterFrom(locations->InAt(0));
664 VRegister rhs = VRegisterFrom(locations->InAt(1));
665 VRegister dst = VRegisterFrom(locations->Out());
666 switch (instruction->GetPackedType()) {
667 case DataType::Type::kUint8:
668 DCHECK_EQ(16u, instruction->GetVectorLength());
669 __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
670 break;
671 case DataType::Type::kInt8:
672 DCHECK_EQ(16u, instruction->GetVectorLength());
673 __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
674 break;
675 case DataType::Type::kUint16:
676 DCHECK_EQ(8u, instruction->GetVectorLength());
677 __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
678 break;
679 case DataType::Type::kInt16:
680 DCHECK_EQ(8u, instruction->GetVectorLength());
681 __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
682 break;
683 case DataType::Type::kUint32:
684 DCHECK_EQ(4u, instruction->GetVectorLength());
685 __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
686 break;
687 case DataType::Type::kInt32:
688 DCHECK_EQ(4u, instruction->GetVectorLength());
689 __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
690 break;
691 case DataType::Type::kFloat32:
692 DCHECK_EQ(4u, instruction->GetVectorLength());
693 __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
694 break;
695 case DataType::Type::kFloat64:
696 DCHECK_EQ(2u, instruction->GetVectorLength());
697 __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
698 break;
699 default:
700 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
701 UNREACHABLE();
702 }
703 }
704
VisitVecMax(HVecMax * instruction)705 void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
706 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
707 }
708
VisitVecMax(HVecMax * instruction)709 void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
710 LocationSummary* locations = instruction->GetLocations();
711 VRegister lhs = VRegisterFrom(locations->InAt(0));
712 VRegister rhs = VRegisterFrom(locations->InAt(1));
713 VRegister dst = VRegisterFrom(locations->Out());
714 switch (instruction->GetPackedType()) {
715 case DataType::Type::kUint8:
716 DCHECK_EQ(16u, instruction->GetVectorLength());
717 __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
718 break;
719 case DataType::Type::kInt8:
720 DCHECK_EQ(16u, instruction->GetVectorLength());
721 __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
722 break;
723 case DataType::Type::kUint16:
724 DCHECK_EQ(8u, instruction->GetVectorLength());
725 __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
726 break;
727 case DataType::Type::kInt16:
728 DCHECK_EQ(8u, instruction->GetVectorLength());
729 __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
730 break;
731 case DataType::Type::kUint32:
732 DCHECK_EQ(4u, instruction->GetVectorLength());
733 __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
734 break;
735 case DataType::Type::kInt32:
736 DCHECK_EQ(4u, instruction->GetVectorLength());
737 __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
738 break;
739 case DataType::Type::kFloat32:
740 DCHECK_EQ(4u, instruction->GetVectorLength());
741 __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
742 break;
743 case DataType::Type::kFloat64:
744 DCHECK_EQ(2u, instruction->GetVectorLength());
745 __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
746 break;
747 default:
748 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
749 UNREACHABLE();
750 }
751 }
752
VisitVecAnd(HVecAnd * instruction)753 void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
754 // TODO: Allow constants supported by BIC (vector, immediate).
755 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
756 }
757
VisitVecAnd(HVecAnd * instruction)758 void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
759 LocationSummary* locations = instruction->GetLocations();
760 VRegister lhs = VRegisterFrom(locations->InAt(0));
761 VRegister rhs = VRegisterFrom(locations->InAt(1));
762 VRegister dst = VRegisterFrom(locations->Out());
763 switch (instruction->GetPackedType()) {
764 case DataType::Type::kBool:
765 case DataType::Type::kUint8:
766 case DataType::Type::kInt8:
767 case DataType::Type::kUint16:
768 case DataType::Type::kInt16:
769 case DataType::Type::kInt32:
770 case DataType::Type::kInt64:
771 case DataType::Type::kFloat32:
772 case DataType::Type::kFloat64:
773 __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
774 break;
775 default:
776 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
777 UNREACHABLE();
778 }
779 }
780
VisitVecAndNot(HVecAndNot * instruction)781 void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) {
782 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
783 }
784
VisitVecAndNot(HVecAndNot * instruction)785 void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
786 // TODO: Use BIC (vector, register).
787 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
788 }
789
VisitVecOr(HVecOr * instruction)790 void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
791 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
792 }
793
VisitVecOr(HVecOr * instruction)794 void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
795 LocationSummary* locations = instruction->GetLocations();
796 VRegister lhs = VRegisterFrom(locations->InAt(0));
797 VRegister rhs = VRegisterFrom(locations->InAt(1));
798 VRegister dst = VRegisterFrom(locations->Out());
799 switch (instruction->GetPackedType()) {
800 case DataType::Type::kBool:
801 case DataType::Type::kUint8:
802 case DataType::Type::kInt8:
803 case DataType::Type::kUint16:
804 case DataType::Type::kInt16:
805 case DataType::Type::kInt32:
806 case DataType::Type::kInt64:
807 case DataType::Type::kFloat32:
808 case DataType::Type::kFloat64:
809 __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
810 break;
811 default:
812 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
813 UNREACHABLE();
814 }
815 }
816
VisitVecXor(HVecXor * instruction)817 void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
818 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
819 }
820
VisitVecXor(HVecXor * instruction)821 void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
822 LocationSummary* locations = instruction->GetLocations();
823 VRegister lhs = VRegisterFrom(locations->InAt(0));
824 VRegister rhs = VRegisterFrom(locations->InAt(1));
825 VRegister dst = VRegisterFrom(locations->Out());
826 switch (instruction->GetPackedType()) {
827 case DataType::Type::kBool:
828 case DataType::Type::kUint8:
829 case DataType::Type::kInt8:
830 case DataType::Type::kUint16:
831 case DataType::Type::kInt16:
832 case DataType::Type::kInt32:
833 case DataType::Type::kInt64:
834 case DataType::Type::kFloat32:
835 case DataType::Type::kFloat64:
836 __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
837 break;
838 default:
839 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
840 UNREACHABLE();
841 }
842 }
843
844 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)845 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
846 LocationSummary* locations = new (allocator) LocationSummary(instruction);
847 switch (instruction->GetPackedType()) {
848 case DataType::Type::kUint8:
849 case DataType::Type::kInt8:
850 case DataType::Type::kUint16:
851 case DataType::Type::kInt16:
852 case DataType::Type::kInt32:
853 case DataType::Type::kInt64:
854 locations->SetInAt(0, Location::RequiresFpuRegister());
855 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
856 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
857 break;
858 default:
859 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
860 UNREACHABLE();
861 }
862 }
863
VisitVecShl(HVecShl * instruction)864 void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
865 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
866 }
867
VisitVecShl(HVecShl * instruction)868 void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
869 LocationSummary* locations = instruction->GetLocations();
870 VRegister lhs = VRegisterFrom(locations->InAt(0));
871 VRegister dst = VRegisterFrom(locations->Out());
872 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
873 switch (instruction->GetPackedType()) {
874 case DataType::Type::kUint8:
875 case DataType::Type::kInt8:
876 DCHECK_EQ(16u, instruction->GetVectorLength());
877 __ Shl(dst.V16B(), lhs.V16B(), value);
878 break;
879 case DataType::Type::kUint16:
880 case DataType::Type::kInt16:
881 DCHECK_EQ(8u, instruction->GetVectorLength());
882 __ Shl(dst.V8H(), lhs.V8H(), value);
883 break;
884 case DataType::Type::kInt32:
885 DCHECK_EQ(4u, instruction->GetVectorLength());
886 __ Shl(dst.V4S(), lhs.V4S(), value);
887 break;
888 case DataType::Type::kInt64:
889 DCHECK_EQ(2u, instruction->GetVectorLength());
890 __ Shl(dst.V2D(), lhs.V2D(), value);
891 break;
892 default:
893 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
894 UNREACHABLE();
895 }
896 }
897
VisitVecShr(HVecShr * instruction)898 void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
899 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
900 }
901
VisitVecShr(HVecShr * instruction)902 void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
903 LocationSummary* locations = instruction->GetLocations();
904 VRegister lhs = VRegisterFrom(locations->InAt(0));
905 VRegister dst = VRegisterFrom(locations->Out());
906 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
907 switch (instruction->GetPackedType()) {
908 case DataType::Type::kUint8:
909 case DataType::Type::kInt8:
910 DCHECK_EQ(16u, instruction->GetVectorLength());
911 __ Sshr(dst.V16B(), lhs.V16B(), value);
912 break;
913 case DataType::Type::kUint16:
914 case DataType::Type::kInt16:
915 DCHECK_EQ(8u, instruction->GetVectorLength());
916 __ Sshr(dst.V8H(), lhs.V8H(), value);
917 break;
918 case DataType::Type::kInt32:
919 DCHECK_EQ(4u, instruction->GetVectorLength());
920 __ Sshr(dst.V4S(), lhs.V4S(), value);
921 break;
922 case DataType::Type::kInt64:
923 DCHECK_EQ(2u, instruction->GetVectorLength());
924 __ Sshr(dst.V2D(), lhs.V2D(), value);
925 break;
926 default:
927 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
928 UNREACHABLE();
929 }
930 }
931
VisitVecUShr(HVecUShr * instruction)932 void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
933 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
934 }
935
VisitVecUShr(HVecUShr * instruction)936 void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
937 LocationSummary* locations = instruction->GetLocations();
938 VRegister lhs = VRegisterFrom(locations->InAt(0));
939 VRegister dst = VRegisterFrom(locations->Out());
940 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
941 switch (instruction->GetPackedType()) {
942 case DataType::Type::kUint8:
943 case DataType::Type::kInt8:
944 DCHECK_EQ(16u, instruction->GetVectorLength());
945 __ Ushr(dst.V16B(), lhs.V16B(), value);
946 break;
947 case DataType::Type::kUint16:
948 case DataType::Type::kInt16:
949 DCHECK_EQ(8u, instruction->GetVectorLength());
950 __ Ushr(dst.V8H(), lhs.V8H(), value);
951 break;
952 case DataType::Type::kInt32:
953 DCHECK_EQ(4u, instruction->GetVectorLength());
954 __ Ushr(dst.V4S(), lhs.V4S(), value);
955 break;
956 case DataType::Type::kInt64:
957 DCHECK_EQ(2u, instruction->GetVectorLength());
958 __ Ushr(dst.V2D(), lhs.V2D(), value);
959 break;
960 default:
961 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
962 UNREACHABLE();
963 }
964 }
965
VisitVecSetScalars(HVecSetScalars * instruction)966 void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
967 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
968
969 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
970
971 HInstruction* input = instruction->InputAt(0);
972 bool is_zero = IsZeroBitPattern(input);
973
974 switch (instruction->GetPackedType()) {
975 case DataType::Type::kBool:
976 case DataType::Type::kUint8:
977 case DataType::Type::kInt8:
978 case DataType::Type::kUint16:
979 case DataType::Type::kInt16:
980 case DataType::Type::kInt32:
981 case DataType::Type::kInt64:
982 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
983 : Location::RequiresRegister());
984 locations->SetOut(Location::RequiresFpuRegister());
985 break;
986 case DataType::Type::kFloat32:
987 case DataType::Type::kFloat64:
988 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
989 : Location::RequiresFpuRegister());
990 locations->SetOut(Location::RequiresFpuRegister());
991 break;
992 default:
993 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
994 UNREACHABLE();
995 }
996 }
997
VisitVecSetScalars(HVecSetScalars * instruction)998 void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
999 LocationSummary* locations = instruction->GetLocations();
1000 VRegister dst = VRegisterFrom(locations->Out());
1001
1002 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
1003
1004 // Zero out all other elements first.
1005 __ Movi(dst.V16B(), 0);
1006
1007 // Shorthand for any type of zero.
1008 if (IsZeroBitPattern(instruction->InputAt(0))) {
1009 return;
1010 }
1011
1012 // Set required elements.
1013 switch (instruction->GetPackedType()) {
1014 case DataType::Type::kBool:
1015 case DataType::Type::kUint8:
1016 case DataType::Type::kInt8:
1017 DCHECK_EQ(16u, instruction->GetVectorLength());
1018 __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
1019 break;
1020 case DataType::Type::kUint16:
1021 case DataType::Type::kInt16:
1022 DCHECK_EQ(8u, instruction->GetVectorLength());
1023 __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
1024 break;
1025 case DataType::Type::kInt32:
1026 DCHECK_EQ(4u, instruction->GetVectorLength());
1027 __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
1028 break;
1029 case DataType::Type::kInt64:
1030 DCHECK_EQ(2u, instruction->GetVectorLength());
1031 __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
1032 break;
1033 default:
1034 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1035 UNREACHABLE();
1036 }
1037 }
1038
1039 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1040 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1041 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1042 switch (instruction->GetPackedType()) {
1043 case DataType::Type::kUint8:
1044 case DataType::Type::kInt8:
1045 case DataType::Type::kUint16:
1046 case DataType::Type::kInt16:
1047 case DataType::Type::kInt32:
1048 case DataType::Type::kInt64:
1049 locations->SetInAt(0, Location::RequiresFpuRegister());
1050 locations->SetInAt(1, Location::RequiresFpuRegister());
1051 locations->SetInAt(2, Location::RequiresFpuRegister());
1052 locations->SetOut(Location::SameAsFirstInput());
1053 break;
1054 default:
1055 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1056 UNREACHABLE();
1057 }
1058 }
1059
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1060 void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1061 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1062 }
1063
1064 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
1065 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
1066 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1067 void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1068 LocationSummary* locations = instruction->GetLocations();
1069 VRegister acc = VRegisterFrom(locations->InAt(0));
1070 VRegister left = VRegisterFrom(locations->InAt(1));
1071 VRegister right = VRegisterFrom(locations->InAt(2));
1072
1073 DCHECK(locations->InAt(0).Equals(locations->Out()));
1074
1075 switch (instruction->GetPackedType()) {
1076 case DataType::Type::kUint8:
1077 case DataType::Type::kInt8:
1078 DCHECK_EQ(16u, instruction->GetVectorLength());
1079 if (instruction->GetOpKind() == HInstruction::kAdd) {
1080 __ Mla(acc.V16B(), left.V16B(), right.V16B());
1081 } else {
1082 __ Mls(acc.V16B(), left.V16B(), right.V16B());
1083 }
1084 break;
1085 case DataType::Type::kUint16:
1086 case DataType::Type::kInt16:
1087 DCHECK_EQ(8u, instruction->GetVectorLength());
1088 if (instruction->GetOpKind() == HInstruction::kAdd) {
1089 __ Mla(acc.V8H(), left.V8H(), right.V8H());
1090 } else {
1091 __ Mls(acc.V8H(), left.V8H(), right.V8H());
1092 }
1093 break;
1094 case DataType::Type::kInt32:
1095 DCHECK_EQ(4u, instruction->GetVectorLength());
1096 if (instruction->GetOpKind() == HInstruction::kAdd) {
1097 __ Mla(acc.V4S(), left.V4S(), right.V4S());
1098 } else {
1099 __ Mls(acc.V4S(), left.V4S(), right.V4S());
1100 }
1101 break;
1102 default:
1103 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1104 UNREACHABLE();
1105 }
1106 }
1107
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1108 void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1109 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1110 // Some conversions require temporary registers.
1111 LocationSummary* locations = instruction->GetLocations();
1112 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1113 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1114 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1115 HVecOperation::ToSignedType(b->GetPackedType()));
1116 switch (a->GetPackedType()) {
1117 case DataType::Type::kUint8:
1118 case DataType::Type::kInt8:
1119 switch (instruction->GetPackedType()) {
1120 case DataType::Type::kInt64:
1121 locations->AddTemp(Location::RequiresFpuRegister());
1122 locations->AddTemp(Location::RequiresFpuRegister());
1123 FALLTHROUGH_INTENDED;
1124 case DataType::Type::kInt32:
1125 locations->AddTemp(Location::RequiresFpuRegister());
1126 locations->AddTemp(Location::RequiresFpuRegister());
1127 break;
1128 default:
1129 break;
1130 }
1131 break;
1132 case DataType::Type::kUint16:
1133 case DataType::Type::kInt16:
1134 if (instruction->GetPackedType() == DataType::Type::kInt64) {
1135 locations->AddTemp(Location::RequiresFpuRegister());
1136 locations->AddTemp(Location::RequiresFpuRegister());
1137 }
1138 break;
1139 case DataType::Type::kInt32:
1140 case DataType::Type::kInt64:
1141 if (instruction->GetPackedType() == a->GetPackedType()) {
1142 locations->AddTemp(Location::RequiresFpuRegister());
1143 }
1144 break;
1145 default:
1146 break;
1147 }
1148 }
1149
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1150 void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1151 LocationSummary* locations = instruction->GetLocations();
1152 VRegister acc = VRegisterFrom(locations->InAt(0));
1153 VRegister left = VRegisterFrom(locations->InAt(1));
1154 VRegister right = VRegisterFrom(locations->InAt(2));
1155
1156 DCHECK(locations->InAt(0).Equals(locations->Out()));
1157
1158 // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
1159 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1160 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1161 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1162 HVecOperation::ToSignedType(b->GetPackedType()));
1163 switch (a->GetPackedType()) {
1164 case DataType::Type::kUint8:
1165 case DataType::Type::kInt8:
1166 DCHECK_EQ(16u, a->GetVectorLength());
1167 switch (instruction->GetPackedType()) {
1168 case DataType::Type::kInt16:
1169 DCHECK_EQ(8u, instruction->GetVectorLength());
1170 __ Sabal(acc.V8H(), left.V8B(), right.V8B());
1171 __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
1172 break;
1173 case DataType::Type::kInt32: {
1174 DCHECK_EQ(4u, instruction->GetVectorLength());
1175 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1176 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1177 __ Sxtl(tmp1.V8H(), left.V8B());
1178 __ Sxtl(tmp2.V8H(), right.V8B());
1179 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1180 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1181 __ Sxtl2(tmp1.V8H(), left.V16B());
1182 __ Sxtl2(tmp2.V8H(), right.V16B());
1183 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1184 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1185 break;
1186 }
1187 case DataType::Type::kInt64: {
1188 DCHECK_EQ(2u, instruction->GetVectorLength());
1189 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1190 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1191 VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
1192 VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
1193 __ Sxtl(tmp1.V8H(), left.V8B());
1194 __ Sxtl(tmp2.V8H(), right.V8B());
1195 __ Sxtl(tmp3.V4S(), tmp1.V4H());
1196 __ Sxtl(tmp4.V4S(), tmp2.V4H());
1197 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1198 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1199 __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1200 __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1201 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1202 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1203 __ Sxtl2(tmp1.V8H(), left.V16B());
1204 __ Sxtl2(tmp2.V8H(), right.V16B());
1205 __ Sxtl(tmp3.V4S(), tmp1.V4H());
1206 __ Sxtl(tmp4.V4S(), tmp2.V4H());
1207 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1208 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1209 __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1210 __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1211 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1212 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1213 break;
1214 }
1215 default:
1216 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1217 UNREACHABLE();
1218 }
1219 break;
1220 case DataType::Type::kUint16:
1221 case DataType::Type::kInt16:
1222 DCHECK_EQ(8u, a->GetVectorLength());
1223 switch (instruction->GetPackedType()) {
1224 case DataType::Type::kInt32:
1225 DCHECK_EQ(4u, instruction->GetVectorLength());
1226 __ Sabal(acc.V4S(), left.V4H(), right.V4H());
1227 __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
1228 break;
1229 case DataType::Type::kInt64: {
1230 DCHECK_EQ(2u, instruction->GetVectorLength());
1231 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1232 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1233 __ Sxtl(tmp1.V4S(), left.V4H());
1234 __ Sxtl(tmp2.V4S(), right.V4H());
1235 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1236 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1237 __ Sxtl2(tmp1.V4S(), left.V8H());
1238 __ Sxtl2(tmp2.V4S(), right.V8H());
1239 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1240 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1241 break;
1242 }
1243 default:
1244 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1245 UNREACHABLE();
1246 }
1247 break;
1248 case DataType::Type::kInt32:
1249 DCHECK_EQ(4u, a->GetVectorLength());
1250 switch (instruction->GetPackedType()) {
1251 case DataType::Type::kInt32: {
1252 DCHECK_EQ(4u, instruction->GetVectorLength());
1253 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1254 __ Sub(tmp.V4S(), left.V4S(), right.V4S());
1255 __ Abs(tmp.V4S(), tmp.V4S());
1256 __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
1257 break;
1258 }
1259 case DataType::Type::kInt64:
1260 DCHECK_EQ(2u, instruction->GetVectorLength());
1261 __ Sabal(acc.V2D(), left.V2S(), right.V2S());
1262 __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
1263 break;
1264 default:
1265 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1266 UNREACHABLE();
1267 }
1268 break;
1269 case DataType::Type::kInt64:
1270 DCHECK_EQ(2u, a->GetVectorLength());
1271 switch (instruction->GetPackedType()) {
1272 case DataType::Type::kInt64: {
1273 DCHECK_EQ(2u, instruction->GetVectorLength());
1274 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1275 __ Sub(tmp.V2D(), left.V2D(), right.V2D());
1276 __ Abs(tmp.V2D(), tmp.V2D());
1277 __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
1278 break;
1279 }
1280 default:
1281 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1282 UNREACHABLE();
1283 }
1284 break;
1285 default:
1286 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1287 }
1288 }
1289
VisitVecDotProd(HVecDotProd * instruction)1290 void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) {
1291 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1292 DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1293 locations->SetInAt(0, Location::RequiresFpuRegister());
1294 locations->SetInAt(1, Location::RequiresFpuRegister());
1295 locations->SetInAt(2, Location::RequiresFpuRegister());
1296 locations->SetOut(Location::SameAsFirstInput());
1297
1298 // For Int8 and Uint8 general case we need a temp register.
1299 if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
1300 !ShouldEmitDotProductInstructions(codegen_)) {
1301 locations->AddTemp(Location::RequiresFpuRegister());
1302 }
1303 }
1304
VisitVecDotProd(HVecDotProd * instruction)1305 void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) {
1306 LocationSummary* locations = instruction->GetLocations();
1307 DCHECK(locations->InAt(0).Equals(locations->Out()));
1308 VRegister acc = VRegisterFrom(locations->InAt(0));
1309 VRegister left = VRegisterFrom(locations->InAt(1));
1310 VRegister right = VRegisterFrom(locations->InAt(2));
1311 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1312 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1313 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1314 HVecOperation::ToSignedType(b->GetPackedType()));
1315 DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1316 DCHECK_EQ(4u, instruction->GetVectorLength());
1317
1318 size_t inputs_data_size = DataType::Size(a->GetPackedType());
1319 switch (inputs_data_size) {
1320 case 1u: {
1321 DCHECK_EQ(16u, a->GetVectorLength());
1322 if (instruction->IsZeroExtending()) {
1323 if (ShouldEmitDotProductInstructions(codegen_)) {
1324 __ Udot(acc.V4S(), left.V16B(), right.V16B());
1325 } else {
1326 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1327 __ Umull(tmp.V8H(), left.V8B(), right.V8B());
1328 __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1329 __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1330
1331 __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
1332 __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1333 __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1334 }
1335 } else {
1336 if (ShouldEmitDotProductInstructions(codegen_)) {
1337 __ Sdot(acc.V4S(), left.V16B(), right.V16B());
1338 } else {
1339 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1340 __ Smull(tmp.V8H(), left.V8B(), right.V8B());
1341 __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1342 __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1343
1344 __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
1345 __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1346 __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1347 }
1348 }
1349 break;
1350 }
1351 case 2u:
1352 DCHECK_EQ(8u, a->GetVectorLength());
1353 if (instruction->IsZeroExtending()) {
1354 __ Umlal(acc.V4S(), left.V4H(), right.V4H());
1355 __ Umlal2(acc.V4S(), left.V8H(), right.V8H());
1356 } else {
1357 __ Smlal(acc.V4S(), left.V4H(), right.V4H());
1358 __ Smlal2(acc.V4S(), left.V8H(), right.V8H());
1359 }
1360 break;
1361 default:
1362 LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1363 }
1364 }
1365
1366 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1367 static void CreateVecMemLocations(ArenaAllocator* allocator,
1368 HVecMemoryOperation* instruction,
1369 bool is_load) {
1370 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1371 switch (instruction->GetPackedType()) {
1372 case DataType::Type::kBool:
1373 case DataType::Type::kUint8:
1374 case DataType::Type::kInt8:
1375 case DataType::Type::kUint16:
1376 case DataType::Type::kInt16:
1377 case DataType::Type::kInt32:
1378 case DataType::Type::kInt64:
1379 case DataType::Type::kFloat32:
1380 case DataType::Type::kFloat64:
1381 locations->SetInAt(0, Location::RequiresRegister());
1382 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1383 if (is_load) {
1384 locations->SetOut(Location::RequiresFpuRegister());
1385 } else {
1386 locations->SetInAt(2, Location::RequiresFpuRegister());
1387 }
1388 break;
1389 default:
1390 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1391 UNREACHABLE();
1392 }
1393 }
1394
1395 // Helper to set up locations for vector memory operations. Returns the memory operand and,
1396 // if used, sets the output parameter scratch to a temporary register used in this operand,
1397 // so that the client can release it right after the memory operand use.
VecAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)1398 MemOperand InstructionCodeGeneratorARM64::VecAddress(
1399 HVecMemoryOperation* instruction,
1400 UseScratchRegisterScope* temps_scope,
1401 size_t size,
1402 bool is_string_char_at,
1403 /*out*/ Register* scratch) {
1404 LocationSummary* locations = instruction->GetLocations();
1405 Register base = InputRegisterAt(instruction, 0);
1406
1407 if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
1408 DCHECK(!is_string_char_at);
1409 return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
1410 }
1411
1412 Location index = locations->InAt(1);
1413 uint32_t offset = is_string_char_at
1414 ? mirror::String::ValueOffset().Uint32Value()
1415 : mirror::Array::DataOffset(size).Uint32Value();
1416 size_t shift = ComponentSizeShiftWidth(size);
1417
1418 // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
1419 DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
1420
1421 if (index.IsConstant()) {
1422 offset += Int64FromLocation(index) << shift;
1423 return HeapOperand(base, offset);
1424 } else {
1425 *scratch = temps_scope->AcquireSameSizeAs(base);
1426 __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
1427 return HeapOperand(*scratch, offset);
1428 }
1429 }
1430
VisitVecLoad(HVecLoad * instruction)1431 void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
1432 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1433 }
1434
VisitVecLoad(HVecLoad * instruction)1435 void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
1436 LocationSummary* locations = instruction->GetLocations();
1437 size_t size = DataType::Size(instruction->GetPackedType());
1438 VRegister reg = VRegisterFrom(locations->Out());
1439 UseScratchRegisterScope temps(GetVIXLAssembler());
1440 Register scratch;
1441
1442 switch (instruction->GetPackedType()) {
1443 case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1444 case DataType::Type::kUint16:
1445 DCHECK_EQ(8u, instruction->GetVectorLength());
1446 // Special handling of compressed/uncompressed string load.
1447 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1448 vixl::aarch64::Label uncompressed_load, done;
1449 // Test compression bit.
1450 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1451 "Expecting 0=compressed, 1=uncompressed");
1452 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1453 Register length = temps.AcquireW();
1454 __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
1455 __ Tbnz(length.W(), 0, &uncompressed_load);
1456 temps.Release(length); // no longer needed
1457 // Zero extend 8 compressed bytes into 8 chars.
1458 __ Ldr(DRegisterFrom(locations->Out()).V8B(),
1459 VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
1460 __ Uxtl(reg.V8H(), reg.V8B());
1461 __ B(&done);
1462 if (scratch.IsValid()) {
1463 temps.Release(scratch); // if used, no longer needed
1464 }
1465 // Load 8 direct uncompressed chars.
1466 __ Bind(&uncompressed_load);
1467 __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
1468 __ Bind(&done);
1469 return;
1470 }
1471 FALLTHROUGH_INTENDED;
1472 case DataType::Type::kBool:
1473 case DataType::Type::kUint8:
1474 case DataType::Type::kInt8:
1475 case DataType::Type::kInt32:
1476 case DataType::Type::kFloat32:
1477 case DataType::Type::kInt64:
1478 case DataType::Type::kFloat64:
1479 DCHECK_LE(2u, instruction->GetVectorLength());
1480 DCHECK_LE(instruction->GetVectorLength(), 16u);
1481 __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
1482 break;
1483 default:
1484 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1485 UNREACHABLE();
1486 }
1487 }
1488
VisitVecStore(HVecStore * instruction)1489 void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
1490 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1491 }
1492
VisitVecStore(HVecStore * instruction)1493 void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
1494 LocationSummary* locations = instruction->GetLocations();
1495 size_t size = DataType::Size(instruction->GetPackedType());
1496 VRegister reg = VRegisterFrom(locations->InAt(2));
1497 UseScratchRegisterScope temps(GetVIXLAssembler());
1498 Register scratch;
1499
1500 switch (instruction->GetPackedType()) {
1501 case DataType::Type::kBool:
1502 case DataType::Type::kUint8:
1503 case DataType::Type::kInt8:
1504 case DataType::Type::kUint16:
1505 case DataType::Type::kInt16:
1506 case DataType::Type::kInt32:
1507 case DataType::Type::kFloat32:
1508 case DataType::Type::kInt64:
1509 case DataType::Type::kFloat64:
1510 DCHECK_LE(2u, instruction->GetVectorLength());
1511 DCHECK_LE(instruction->GetVectorLength(), 16u);
1512 __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1513 break;
1514 default:
1515 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1516 UNREACHABLE();
1517 }
1518 }
1519
1520 #undef __
1521
1522 } // namespace arm64
1523 } // namespace art
1524