1 /*
2 * Copyright (C) 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm64.h"
18
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "base/bit_utils_iterator.h"
21 #include "mirror/array-inl.h"
22 #include "mirror/string.h"
23
24 using namespace vixl::aarch64; // NOLINT(build/namespaces)
25
26 namespace art {
27 namespace arm64 {
28
29 using helpers::DRegisterFrom;
30 using helpers::HeapOperand;
31 using helpers::InputRegisterAt;
32 using helpers::Int64FromLocation;
33 using helpers::LocationFrom;
34 using helpers::OutputRegister;
35 using helpers::QRegisterFrom;
36 using helpers::StackOperandFrom;
37 using helpers::SveStackOperandFrom;
38 using helpers::VRegisterFrom;
39 using helpers::ZRegisterFrom;
40 using helpers::XRegisterFrom;
41
42 #define __ GetVIXLAssembler()->
43
44 // Returns whether the value of the constant can be directly encoded into the instruction as
45 // immediate.
SVECanEncodeConstantAsImmediate(HConstant * constant,HInstruction * instr)46 static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
47 if (instr->IsVecReplicateScalar()) {
48 if (constant->IsLongConstant()) {
49 return false;
50 } else if (constant->IsFloatConstant()) {
51 return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
52 } else if (constant->IsDoubleConstant()) {
53 return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
54 }
55 // TODO: Make use of shift part of DUP instruction.
56 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
57 return IsInt<8>(value);
58 }
59
60 return false;
61 }
62
63 // Returns
64 // - constant location - if 'constant' is an actual constant and its value can be
65 // encoded into the instruction.
66 // - register location otherwise.
SVEEncodableConstantOrRegister(HInstruction * constant,HInstruction * instr)67 inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
68 if (constant->IsConstant()
69 && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
70 return Location::ConstantLocation(constant->AsConstant());
71 }
72
73 return Location::RequiresRegister();
74 }
75
ValidateVectorLength(HVecOperation * instr) const76 void InstructionCodeGeneratorARM64Sve::ValidateVectorLength(HVecOperation* instr) const {
77 DCHECK_EQ(DataType::Size(instr->GetPackedType()) * instr->GetVectorLength(),
78 codegen_->GetSIMDRegisterWidth());
79 }
80
VisitVecReplicateScalar(HVecReplicateScalar * instruction)81 void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
82 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
83 HInstruction* input = instruction->InputAt(0);
84 switch (instruction->GetPackedType()) {
85 case DataType::Type::kBool:
86 case DataType::Type::kUint8:
87 case DataType::Type::kInt8:
88 case DataType::Type::kUint16:
89 case DataType::Type::kInt16:
90 case DataType::Type::kInt32:
91 case DataType::Type::kInt64:
92 locations->SetInAt(0, SVEEncodableConstantOrRegister(input, instruction));
93 locations->SetOut(Location::RequiresFpuRegister());
94 break;
95 case DataType::Type::kFloat32:
96 case DataType::Type::kFloat64:
97 if (input->IsConstant() &&
98 SVECanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
99 locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
100 locations->SetOut(Location::RequiresFpuRegister());
101 } else {
102 locations->SetInAt(0, Location::RequiresFpuRegister());
103 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
104 }
105 break;
106 default:
107 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
108 UNREACHABLE();
109 }
110 }
111
VisitVecReplicateScalar(HVecReplicateScalar * instruction)112 void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
113 DCHECK(instruction->IsPredicated());
114 LocationSummary* locations = instruction->GetLocations();
115 Location src_loc = locations->InAt(0);
116 const ZRegister dst = ZRegisterFrom(locations->Out());
117 ValidateVectorLength(instruction);
118 switch (instruction->GetPackedType()) {
119 case DataType::Type::kBool:
120 case DataType::Type::kUint8:
121 case DataType::Type::kInt8:
122 if (src_loc.IsConstant()) {
123 __ Dup(dst.VnB(), Int64FromLocation(src_loc));
124 } else {
125 __ Dup(dst.VnB(), InputRegisterAt(instruction, 0));
126 }
127 break;
128 case DataType::Type::kUint16:
129 case DataType::Type::kInt16:
130 if (src_loc.IsConstant()) {
131 __ Dup(dst.VnH(), Int64FromLocation(src_loc));
132 } else {
133 __ Dup(dst.VnH(), InputRegisterAt(instruction, 0));
134 }
135 break;
136 case DataType::Type::kInt32:
137 if (src_loc.IsConstant()) {
138 __ Dup(dst.VnS(), Int64FromLocation(src_loc));
139 } else {
140 __ Dup(dst.VnS(), InputRegisterAt(instruction, 0));
141 }
142 break;
143 case DataType::Type::kInt64:
144 if (src_loc.IsConstant()) {
145 __ Dup(dst.VnD(), Int64FromLocation(src_loc));
146 } else {
147 __ Dup(dst.VnD(), XRegisterFrom(src_loc));
148 }
149 break;
150 case DataType::Type::kFloat32:
151 if (src_loc.IsConstant()) {
152 __ Fdup(dst.VnS(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
153 } else {
154 __ Dup(dst.VnS(), ZRegisterFrom(src_loc).VnS(), 0);
155 }
156 break;
157 case DataType::Type::kFloat64:
158 if (src_loc.IsConstant()) {
159 __ Fdup(dst.VnD(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
160 } else {
161 __ Dup(dst.VnD(), ZRegisterFrom(src_loc).VnD(), 0);
162 }
163 break;
164 default:
165 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
166 UNREACHABLE();
167 }
168 }
169
VisitVecExtractScalar(HVecExtractScalar * instruction)170 void LocationsBuilderARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
171 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
172 switch (instruction->GetPackedType()) {
173 case DataType::Type::kBool:
174 case DataType::Type::kUint8:
175 case DataType::Type::kInt8:
176 case DataType::Type::kUint16:
177 case DataType::Type::kInt16:
178 case DataType::Type::kInt32:
179 case DataType::Type::kInt64:
180 locations->SetInAt(0, Location::RequiresFpuRegister());
181 locations->SetOut(Location::RequiresRegister());
182 break;
183 case DataType::Type::kFloat32:
184 case DataType::Type::kFloat64:
185 locations->SetInAt(0, Location::RequiresFpuRegister());
186 locations->SetOut(Location::SameAsFirstInput());
187 break;
188 default:
189 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
190 UNREACHABLE();
191 }
192 }
193
VisitVecExtractScalar(HVecExtractScalar * instruction)194 void InstructionCodeGeneratorARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
195 DCHECK(instruction->IsPredicated());
196 LocationSummary* locations = instruction->GetLocations();
197 const VRegister src = VRegisterFrom(locations->InAt(0));
198 ValidateVectorLength(instruction);
199 switch (instruction->GetPackedType()) {
200 case DataType::Type::kInt32:
201 __ Umov(OutputRegister(instruction), src.V4S(), 0);
202 break;
203 case DataType::Type::kInt64:
204 __ Umov(OutputRegister(instruction), src.V2D(), 0);
205 break;
206 case DataType::Type::kFloat32:
207 case DataType::Type::kFloat64:
208 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
209 break;
210 default:
211 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
212 UNREACHABLE();
213 }
214 }
215
216 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)217 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
218 LocationSummary* locations = new (allocator) LocationSummary(instruction);
219 switch (instruction->GetPackedType()) {
220 case DataType::Type::kBool:
221 locations->SetInAt(0, Location::RequiresFpuRegister());
222 locations->SetOut(Location::RequiresFpuRegister(),
223 instruction->IsVecNot() ? Location::kOutputOverlap
224 : Location::kNoOutputOverlap);
225 break;
226 case DataType::Type::kUint8:
227 case DataType::Type::kInt8:
228 case DataType::Type::kUint16:
229 case DataType::Type::kInt16:
230 case DataType::Type::kInt32:
231 case DataType::Type::kInt64:
232 case DataType::Type::kFloat32:
233 case DataType::Type::kFloat64:
234 locations->SetInAt(0, Location::RequiresFpuRegister());
235 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
236 break;
237 default:
238 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
239 UNREACHABLE();
240 }
241 }
242
VisitVecReduce(HVecReduce * instruction)243 void LocationsBuilderARM64Sve::VisitVecReduce(HVecReduce* instruction) {
244 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
245 }
246
VisitVecReduce(HVecReduce * instruction)247 void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
248 DCHECK(instruction->IsPredicated());
249 LocationSummary* locations = instruction->GetLocations();
250 const ZRegister src = ZRegisterFrom(locations->InAt(0));
251 const VRegister dst = DRegisterFrom(locations->Out());
252 const PRegister p_reg = LoopPReg();
253 ValidateVectorLength(instruction);
254 switch (instruction->GetPackedType()) {
255 case DataType::Type::kInt32:
256 switch (instruction->GetReductionKind()) {
257 case HVecReduce::kSum:
258 __ Saddv(dst.S(), p_reg, src.VnS());
259 break;
260 default:
261 LOG(FATAL) << "Unsupported SIMD instruction";
262 UNREACHABLE();
263 }
264 break;
265 case DataType::Type::kInt64:
266 switch (instruction->GetReductionKind()) {
267 case HVecReduce::kSum:
268 __ Uaddv(dst.D(), p_reg, src.VnD());
269 break;
270 default:
271 LOG(FATAL) << "Unsupported SIMD instruction";
272 UNREACHABLE();
273 }
274 break;
275 default:
276 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
277 UNREACHABLE();
278 }
279 }
280
VisitVecCnv(HVecCnv * instruction)281 void LocationsBuilderARM64Sve::VisitVecCnv(HVecCnv* instruction) {
282 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
283 }
284
VisitVecCnv(HVecCnv * instruction)285 void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
286 DCHECK(instruction->IsPredicated());
287 LocationSummary* locations = instruction->GetLocations();
288 const ZRegister src = ZRegisterFrom(locations->InAt(0));
289 const ZRegister dst = ZRegisterFrom(locations->Out());
290 const PRegisterM p_reg = LoopPReg().Merging();
291 DataType::Type from = instruction->GetInputType();
292 DataType::Type to = instruction->GetResultType();
293 ValidateVectorLength(instruction);
294 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
295 __ Scvtf(dst.VnS(), p_reg, src.VnS());
296 } else {
297 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
298 }
299 }
300
VisitVecNeg(HVecNeg * instruction)301 void LocationsBuilderARM64Sve::VisitVecNeg(HVecNeg* instruction) {
302 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
303 }
304
VisitVecNeg(HVecNeg * instruction)305 void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
306 DCHECK(instruction->IsPredicated());
307 LocationSummary* locations = instruction->GetLocations();
308 const ZRegister src = ZRegisterFrom(locations->InAt(0));
309 const ZRegister dst = ZRegisterFrom(locations->Out());
310 const PRegisterM p_reg = LoopPReg().Merging();
311 ValidateVectorLength(instruction);
312 switch (instruction->GetPackedType()) {
313 case DataType::Type::kUint8:
314 case DataType::Type::kInt8:
315 __ Neg(dst.VnB(), p_reg, src.VnB());
316 break;
317 case DataType::Type::kUint16:
318 case DataType::Type::kInt16:
319 __ Neg(dst.VnH(), p_reg, src.VnH());
320 break;
321 case DataType::Type::kInt32:
322 __ Neg(dst.VnS(), p_reg, src.VnS());
323 break;
324 case DataType::Type::kInt64:
325 __ Neg(dst.VnD(), p_reg, src.VnD());
326 break;
327 case DataType::Type::kFloat32:
328 __ Fneg(dst.VnS(), p_reg, src.VnS());
329 break;
330 case DataType::Type::kFloat64:
331 __ Fneg(dst.VnD(), p_reg, src.VnD());
332 break;
333 default:
334 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
335 UNREACHABLE();
336 }
337 }
338
VisitVecAbs(HVecAbs * instruction)339 void LocationsBuilderARM64Sve::VisitVecAbs(HVecAbs* instruction) {
340 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
341 }
342
VisitVecAbs(HVecAbs * instruction)343 void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
344 DCHECK(instruction->IsPredicated());
345 LocationSummary* locations = instruction->GetLocations();
346 const ZRegister src = ZRegisterFrom(locations->InAt(0));
347 const ZRegister dst = ZRegisterFrom(locations->Out());
348 const PRegisterM p_reg = LoopPReg().Merging();
349 ValidateVectorLength(instruction);
350 switch (instruction->GetPackedType()) {
351 case DataType::Type::kInt8:
352 __ Abs(dst.VnB(), p_reg, src.VnB());
353 break;
354 case DataType::Type::kInt16:
355 __ Abs(dst.VnH(), p_reg, src.VnH());
356 break;
357 case DataType::Type::kInt32:
358 __ Abs(dst.VnS(), p_reg, src.VnS());
359 break;
360 case DataType::Type::kInt64:
361 __ Abs(dst.VnD(), p_reg, src.VnD());
362 break;
363 case DataType::Type::kFloat32:
364 __ Fabs(dst.VnS(), p_reg, src.VnS());
365 break;
366 case DataType::Type::kFloat64:
367 __ Fabs(dst.VnD(), p_reg, src.VnD());
368 break;
369 default:
370 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
371 UNREACHABLE();
372 }
373 }
374
VisitVecNot(HVecNot * instruction)375 void LocationsBuilderARM64Sve::VisitVecNot(HVecNot* instruction) {
376 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
377 }
378
VisitVecNot(HVecNot * instruction)379 void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
380 DCHECK(instruction->IsPredicated());
381 LocationSummary* locations = instruction->GetLocations();
382 const ZRegister src = ZRegisterFrom(locations->InAt(0));
383 const ZRegister dst = ZRegisterFrom(locations->Out());
384 const PRegisterM p_reg = LoopPReg().Merging();
385 ValidateVectorLength(instruction);
386 switch (instruction->GetPackedType()) {
387 case DataType::Type::kBool: // special case boolean-not
388 __ Dup(dst.VnB(), 1);
389 __ Eor(dst.VnB(), p_reg, dst.VnB(), src.VnB());
390 break;
391 case DataType::Type::kUint8:
392 case DataType::Type::kInt8:
393 __ Not(dst.VnB(), p_reg, src.VnB());
394 break;
395 case DataType::Type::kUint16:
396 case DataType::Type::kInt16:
397 __ Not(dst.VnH(), p_reg, src.VnH());
398 break;
399 case DataType::Type::kInt32:
400 __ Not(dst.VnS(), p_reg, src.VnS());
401 break;
402 case DataType::Type::kInt64:
403 __ Not(dst.VnD(), p_reg, src.VnD());
404 break;
405 default:
406 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
407 UNREACHABLE();
408 }
409 }
410
411 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)412 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
413 LocationSummary* locations = new (allocator) LocationSummary(instruction);
414 switch (instruction->GetPackedType()) {
415 case DataType::Type::kBool:
416 case DataType::Type::kUint8:
417 case DataType::Type::kInt8:
418 case DataType::Type::kUint16:
419 case DataType::Type::kInt16:
420 case DataType::Type::kInt32:
421 case DataType::Type::kInt64:
422 case DataType::Type::kFloat32:
423 case DataType::Type::kFloat64:
424 locations->SetInAt(0, Location::RequiresFpuRegister());
425 locations->SetInAt(1, Location::RequiresFpuRegister());
426 locations->SetOut(Location::SameAsFirstInput());
427 break;
428 default:
429 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
430 UNREACHABLE();
431 }
432 }
433
VisitVecAdd(HVecAdd * instruction)434 void LocationsBuilderARM64Sve::VisitVecAdd(HVecAdd* instruction) {
435 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
436 }
437
VisitVecAdd(HVecAdd * instruction)438 void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
439 DCHECK(instruction->IsPredicated());
440 LocationSummary* locations = instruction->GetLocations();
441 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
442 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
443 const ZRegister dst = ZRegisterFrom(locations->Out());
444 const PRegisterM p_reg = LoopPReg().Merging();
445 ValidateVectorLength(instruction);
446 switch (instruction->GetPackedType()) {
447 case DataType::Type::kUint8:
448 case DataType::Type::kInt8:
449 __ Add(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
450 break;
451 case DataType::Type::kUint16:
452 case DataType::Type::kInt16:
453 __ Add(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
454 break;
455 case DataType::Type::kInt32:
456 __ Add(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
457 break;
458 case DataType::Type::kInt64:
459 __ Add(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
460 break;
461 case DataType::Type::kFloat32:
462 __ Fadd(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
463 break;
464 case DataType::Type::kFloat64:
465 __ Fadd(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
466 break;
467 default:
468 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
469 UNREACHABLE();
470 }
471 }
472
VisitVecSaturationAdd(HVecSaturationAdd * instruction)473 void LocationsBuilderARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
474 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
475 UNREACHABLE();
476 }
477
VisitVecSaturationAdd(HVecSaturationAdd * instruction)478 void InstructionCodeGeneratorARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
479 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
480 UNREACHABLE();
481 }
482
VisitVecHalvingAdd(HVecHalvingAdd * instruction)483 void LocationsBuilderARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
484 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
485 UNREACHABLE();
486 }
487
VisitVecHalvingAdd(HVecHalvingAdd * instruction)488 void InstructionCodeGeneratorARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
489 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
490 UNREACHABLE();
491 }
492
VisitVecSub(HVecSub * instruction)493 void LocationsBuilderARM64Sve::VisitVecSub(HVecSub* instruction) {
494 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
495 }
496
VisitVecSub(HVecSub * instruction)497 void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
498 DCHECK(instruction->IsPredicated());
499 LocationSummary* locations = instruction->GetLocations();
500 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
501 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
502 const ZRegister dst = ZRegisterFrom(locations->Out());
503 const PRegisterM p_reg = LoopPReg().Merging();
504 ValidateVectorLength(instruction);
505 switch (instruction->GetPackedType()) {
506 case DataType::Type::kUint8:
507 case DataType::Type::kInt8:
508 __ Sub(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
509 break;
510 case DataType::Type::kUint16:
511 case DataType::Type::kInt16:
512 __ Sub(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
513 break;
514 case DataType::Type::kInt32:
515 __ Sub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
516 break;
517 case DataType::Type::kInt64:
518 __ Sub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
519 break;
520 case DataType::Type::kFloat32:
521 __ Fsub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
522 break;
523 case DataType::Type::kFloat64:
524 __ Fsub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
525 break;
526 default:
527 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
528 UNREACHABLE();
529 }
530 }
531
VisitVecSaturationSub(HVecSaturationSub * instruction)532 void LocationsBuilderARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
533 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
534 UNREACHABLE();
535 }
536
VisitVecSaturationSub(HVecSaturationSub * instruction)537 void InstructionCodeGeneratorARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
538 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
539 UNREACHABLE();
540 }
541
VisitVecMul(HVecMul * instruction)542 void LocationsBuilderARM64Sve::VisitVecMul(HVecMul* instruction) {
543 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
544 }
545
VisitVecMul(HVecMul * instruction)546 void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
547 DCHECK(instruction->IsPredicated());
548 LocationSummary* locations = instruction->GetLocations();
549 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
550 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
551 const ZRegister dst = ZRegisterFrom(locations->Out());
552 const PRegisterM p_reg = LoopPReg().Merging();
553 ValidateVectorLength(instruction);
554 switch (instruction->GetPackedType()) {
555 case DataType::Type::kUint8:
556 case DataType::Type::kInt8:
557 __ Mul(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
558 break;
559 case DataType::Type::kUint16:
560 case DataType::Type::kInt16:
561 __ Mul(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
562 break;
563 case DataType::Type::kInt32:
564 __ Mul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
565 break;
566 case DataType::Type::kInt64:
567 __ Mul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
568 break;
569 case DataType::Type::kFloat32:
570 __ Fmul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
571 break;
572 case DataType::Type::kFloat64:
573 __ Fmul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
574 break;
575 default:
576 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
577 UNREACHABLE();
578 }
579 }
580
VisitVecDiv(HVecDiv * instruction)581 void LocationsBuilderARM64Sve::VisitVecDiv(HVecDiv* instruction) {
582 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
583 }
584
VisitVecDiv(HVecDiv * instruction)585 void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
586 DCHECK(instruction->IsPredicated());
587 LocationSummary* locations = instruction->GetLocations();
588 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
589 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
590 const ZRegister dst = ZRegisterFrom(locations->Out());
591 const PRegisterM p_reg = LoopPReg().Merging();
592 ValidateVectorLength(instruction);
593
594 // Note: VIXL guarantees StrictNaNPropagation for Fdiv.
595 switch (instruction->GetPackedType()) {
596 case DataType::Type::kFloat32:
597 __ Fdiv(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
598 break;
599 case DataType::Type::kFloat64:
600 __ Fdiv(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
601 break;
602 default:
603 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
604 UNREACHABLE();
605 }
606 }
607
VisitVecMin(HVecMin * instruction)608 void LocationsBuilderARM64Sve::VisitVecMin(HVecMin* instruction) {
609 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
610 UNREACHABLE();
611 }
612
VisitVecMin(HVecMin * instruction)613 void InstructionCodeGeneratorARM64Sve::VisitVecMin(HVecMin* instruction) {
614 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
615 UNREACHABLE();
616 }
617
VisitVecMax(HVecMax * instruction)618 void LocationsBuilderARM64Sve::VisitVecMax(HVecMax* instruction) {
619 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
620 UNREACHABLE();
621 }
622
VisitVecMax(HVecMax * instruction)623 void InstructionCodeGeneratorARM64Sve::VisitVecMax(HVecMax* instruction) {
624 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
625 UNREACHABLE();
626 }
627
VisitVecAnd(HVecAnd * instruction)628 void LocationsBuilderARM64Sve::VisitVecAnd(HVecAnd* instruction) {
629 // TODO: Allow constants supported by BIC (vector, immediate).
630 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
631 }
632
VisitVecAnd(HVecAnd * instruction)633 void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
634 DCHECK(instruction->IsPredicated());
635 LocationSummary* locations = instruction->GetLocations();
636 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
637 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
638 const ZRegister dst = ZRegisterFrom(locations->Out());
639 const PRegisterM p_reg = LoopPReg().Merging();
640 ValidateVectorLength(instruction);
641 switch (instruction->GetPackedType()) {
642 case DataType::Type::kBool:
643 case DataType::Type::kUint8:
644 case DataType::Type::kInt8:
645 __ And(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
646 break;
647 case DataType::Type::kUint16:
648 case DataType::Type::kInt16:
649 __ And(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
650 break;
651 case DataType::Type::kInt32:
652 case DataType::Type::kFloat32:
653 __ And(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
654 break;
655 case DataType::Type::kInt64:
656 case DataType::Type::kFloat64:
657 __ And(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
658 break;
659 default:
660 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
661 UNREACHABLE();
662 }
663 }
664
VisitVecAndNot(HVecAndNot * instruction)665 void LocationsBuilderARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
666 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
667 }
668
VisitVecAndNot(HVecAndNot * instruction)669 void InstructionCodeGeneratorARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
670 // TODO: Use BIC (vector, register).
671 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
672 }
673
VisitVecOr(HVecOr * instruction)674 void LocationsBuilderARM64Sve::VisitVecOr(HVecOr* instruction) {
675 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
676 }
677
VisitVecOr(HVecOr * instruction)678 void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
679 DCHECK(instruction->IsPredicated());
680 LocationSummary* locations = instruction->GetLocations();
681 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
682 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
683 const ZRegister dst = ZRegisterFrom(locations->Out());
684 const PRegisterM p_reg = LoopPReg().Merging();
685 ValidateVectorLength(instruction);
686 switch (instruction->GetPackedType()) {
687 case DataType::Type::kBool:
688 case DataType::Type::kUint8:
689 case DataType::Type::kInt8:
690 __ Orr(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
691 break;
692 case DataType::Type::kUint16:
693 case DataType::Type::kInt16:
694 __ Orr(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
695 break;
696 case DataType::Type::kInt32:
697 case DataType::Type::kFloat32:
698 __ Orr(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
699 break;
700 case DataType::Type::kInt64:
701 case DataType::Type::kFloat64:
702 __ Orr(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
703 break;
704 default:
705 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
706 UNREACHABLE();
707 }
708 }
709
VisitVecXor(HVecXor * instruction)710 void LocationsBuilderARM64Sve::VisitVecXor(HVecXor* instruction) {
711 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
712 }
713
VisitVecXor(HVecXor * instruction)714 void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
715 DCHECK(instruction->IsPredicated());
716 LocationSummary* locations = instruction->GetLocations();
717 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
718 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
719 const ZRegister dst = ZRegisterFrom(locations->Out());
720 const PRegisterM p_reg = LoopPReg().Merging();
721 ValidateVectorLength(instruction);
722 switch (instruction->GetPackedType()) {
723 case DataType::Type::kBool:
724 case DataType::Type::kUint8:
725 case DataType::Type::kInt8:
726 __ Eor(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
727 break;
728 case DataType::Type::kUint16:
729 case DataType::Type::kInt16:
730 __ Eor(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
731 break;
732 case DataType::Type::kInt32:
733 case DataType::Type::kFloat32:
734 __ Eor(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
735 break;
736 case DataType::Type::kInt64:
737 case DataType::Type::kFloat64:
738 __ Eor(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
739 break;
740 default:
741 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
742 UNREACHABLE();
743 }
744 }
745
746 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)747 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
748 LocationSummary* locations = new (allocator) LocationSummary(instruction);
749 switch (instruction->GetPackedType()) {
750 case DataType::Type::kUint8:
751 case DataType::Type::kInt8:
752 case DataType::Type::kUint16:
753 case DataType::Type::kInt16:
754 case DataType::Type::kInt32:
755 case DataType::Type::kInt64:
756 locations->SetInAt(0, Location::RequiresFpuRegister());
757 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
758 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
759 break;
760 default:
761 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
762 UNREACHABLE();
763 }
764 }
765
VisitVecShl(HVecShl * instruction)766 void LocationsBuilderARM64Sve::VisitVecShl(HVecShl* instruction) {
767 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
768 }
769
VisitVecShl(HVecShl * instruction)770 void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
771 DCHECK(instruction->IsPredicated());
772 LocationSummary* locations = instruction->GetLocations();
773 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
774 const ZRegister dst = ZRegisterFrom(locations->Out());
775 const PRegisterM p_reg = LoopPReg().Merging();
776 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
777 ValidateVectorLength(instruction);
778 switch (instruction->GetPackedType()) {
779 case DataType::Type::kUint8:
780 case DataType::Type::kInt8:
781 __ Lsl(dst.VnB(), p_reg, lhs.VnB(), value);
782 break;
783 case DataType::Type::kUint16:
784 case DataType::Type::kInt16:
785 __ Lsl(dst.VnH(), p_reg, lhs.VnH(), value);
786 break;
787 case DataType::Type::kInt32:
788 __ Lsl(dst.VnS(), p_reg, lhs.VnS(), value);
789 break;
790 case DataType::Type::kInt64:
791 __ Lsl(dst.VnD(), p_reg, lhs.VnD(), value);
792 break;
793 default:
794 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
795 UNREACHABLE();
796 }
797 }
798
VisitVecShr(HVecShr * instruction)799 void LocationsBuilderARM64Sve::VisitVecShr(HVecShr* instruction) {
800 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
801 }
802
VisitVecShr(HVecShr * instruction)803 void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
804 DCHECK(instruction->IsPredicated());
805 LocationSummary* locations = instruction->GetLocations();
806 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
807 const ZRegister dst = ZRegisterFrom(locations->Out());
808 const PRegisterM p_reg = LoopPReg().Merging();
809 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
810 ValidateVectorLength(instruction);
811 switch (instruction->GetPackedType()) {
812 case DataType::Type::kUint8:
813 case DataType::Type::kInt8:
814 __ Asr(dst.VnB(), p_reg, lhs.VnB(), value);
815 break;
816 case DataType::Type::kUint16:
817 case DataType::Type::kInt16:
818 __ Asr(dst.VnH(), p_reg, lhs.VnH(), value);
819 break;
820 case DataType::Type::kInt32:
821 __ Asr(dst.VnS(), p_reg, lhs.VnS(), value);
822 break;
823 case DataType::Type::kInt64:
824 __ Asr(dst.VnD(), p_reg, lhs.VnD(), value);
825 break;
826 default:
827 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
828 UNREACHABLE();
829 }
830 }
831
VisitVecUShr(HVecUShr * instruction)832 void LocationsBuilderARM64Sve::VisitVecUShr(HVecUShr* instruction) {
833 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
834 }
835
VisitVecUShr(HVecUShr * instruction)836 void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
837 DCHECK(instruction->IsPredicated());
838 LocationSummary* locations = instruction->GetLocations();
839 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
840 const ZRegister dst = ZRegisterFrom(locations->Out());
841 const PRegisterM p_reg = LoopPReg().Merging();
842 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
843 ValidateVectorLength(instruction);
844 switch (instruction->GetPackedType()) {
845 case DataType::Type::kUint8:
846 case DataType::Type::kInt8:
847 __ Lsr(dst.VnB(), p_reg, lhs.VnB(), value);
848 break;
849 case DataType::Type::kUint16:
850 case DataType::Type::kInt16:
851 __ Lsr(dst.VnH(), p_reg, lhs.VnH(), value);
852 break;
853 case DataType::Type::kInt32:
854 __ Lsr(dst.VnS(), p_reg, lhs.VnS(), value);
855 break;
856 case DataType::Type::kInt64:
857 __ Lsr(dst.VnD(), p_reg, lhs.VnD(), value);
858 break;
859 default:
860 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
861 UNREACHABLE();
862 }
863 }
864
VisitVecSetScalars(HVecSetScalars * instruction)865 void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
866 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
867
868 DCHECK_EQ(2u, instruction->InputCount()); // only one input currently implemented + predicate.
869
870 HInstruction* input = instruction->InputAt(0);
871 bool is_zero = IsZeroBitPattern(input);
872
873 switch (instruction->GetPackedType()) {
874 case DataType::Type::kBool:
875 case DataType::Type::kUint8:
876 case DataType::Type::kInt8:
877 case DataType::Type::kUint16:
878 case DataType::Type::kInt16:
879 case DataType::Type::kInt32:
880 case DataType::Type::kInt64:
881 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
882 : Location::RequiresRegister());
883 locations->SetOut(Location::RequiresFpuRegister());
884 break;
885 case DataType::Type::kFloat32:
886 case DataType::Type::kFloat64:
887 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
888 : Location::RequiresFpuRegister());
889 locations->SetOut(Location::RequiresFpuRegister());
890 break;
891 default:
892 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
893 UNREACHABLE();
894 }
895 }
896
VisitVecSetScalars(HVecSetScalars * instruction)897 void InstructionCodeGeneratorARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
898 DCHECK(instruction->IsPredicated());
899 LocationSummary* locations = instruction->GetLocations();
900 const ZRegister z_dst = ZRegisterFrom(locations->Out());
901
902 DCHECK_EQ(2u, instruction->InputCount()); // only one input currently implemented + predicate.
903
904 // Zero out all other elements first.
905 __ Dup(z_dst.VnB(), 0);
906
907 const VRegister dst = VRegisterFrom(locations->Out());
908 // Shorthand for any type of zero.
909 if (IsZeroBitPattern(instruction->InputAt(0))) {
910 return;
911 }
912 ValidateVectorLength(instruction);
913
914 // Set required elements.
915 switch (instruction->GetPackedType()) {
916 case DataType::Type::kBool:
917 case DataType::Type::kUint8:
918 case DataType::Type::kInt8:
919 __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
920 break;
921 case DataType::Type::kUint16:
922 case DataType::Type::kInt16:
923 __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
924 break;
925 case DataType::Type::kInt32:
926 __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
927 break;
928 case DataType::Type::kInt64:
929 __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
930 break;
931 default:
932 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
933 UNREACHABLE();
934 }
935 }
936
937 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)938 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
939 LocationSummary* locations = new (allocator) LocationSummary(instruction);
940 switch (instruction->GetPackedType()) {
941 case DataType::Type::kUint8:
942 case DataType::Type::kInt8:
943 case DataType::Type::kUint16:
944 case DataType::Type::kInt16:
945 case DataType::Type::kInt32:
946 case DataType::Type::kInt64:
947 locations->SetInAt(0, Location::RequiresFpuRegister());
948 locations->SetInAt(1, Location::RequiresFpuRegister());
949 locations->SetInAt(2, Location::RequiresFpuRegister());
950 locations->SetOut(Location::SameAsFirstInput());
951 break;
952 default:
953 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
954 UNREACHABLE();
955 }
956 }
957
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)958 void LocationsBuilderARM64Sve::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
959 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
960 }
961
962 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
963 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
964 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)965 void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
966 HVecMultiplyAccumulate* instruction) {
967 DCHECK(instruction->IsPredicated());
968 LocationSummary* locations = instruction->GetLocations();
969 const ZRegister acc = ZRegisterFrom(locations->InAt(0));
970 const ZRegister left = ZRegisterFrom(locations->InAt(1));
971 const ZRegister right = ZRegisterFrom(locations->InAt(2));
972 const PRegisterM p_reg = LoopPReg().Merging();
973
974 DCHECK(locations->InAt(0).Equals(locations->Out()));
975 ValidateVectorLength(instruction);
976
977 switch (instruction->GetPackedType()) {
978 case DataType::Type::kUint8:
979 case DataType::Type::kInt8:
980 if (instruction->GetOpKind() == HInstruction::kAdd) {
981 __ Mla(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
982 } else {
983 __ Mls(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
984 }
985 break;
986 case DataType::Type::kUint16:
987 case DataType::Type::kInt16:
988 if (instruction->GetOpKind() == HInstruction::kAdd) {
989 __ Mla(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
990 } else {
991 __ Mls(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
992 }
993 break;
994 case DataType::Type::kInt32:
995 if (instruction->GetOpKind() == HInstruction::kAdd) {
996 __ Mla(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
997 } else {
998 __ Mls(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
999 }
1000 break;
1001 default:
1002 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1003 UNREACHABLE();
1004 }
1005 }
1006
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1007 void LocationsBuilderARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1008 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
1009 UNREACHABLE();
1010 }
1011
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1012 void InstructionCodeGeneratorARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1013 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
1014 UNREACHABLE();
1015 }
1016
VisitVecDotProd(HVecDotProd * instruction)1017 void LocationsBuilderARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
1018 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1019 DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1020 locations->SetInAt(0, Location::RequiresFpuRegister());
1021 locations->SetInAt(1, Location::RequiresFpuRegister());
1022 locations->SetInAt(2, Location::RequiresFpuRegister());
1023 locations->SetOut(Location::SameAsFirstInput());
1024
1025 locations->AddTemp(Location::RequiresFpuRegister());
1026 }
1027
VisitVecDotProd(HVecDotProd * instruction)1028 void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
1029 DCHECK(instruction->IsPredicated());
1030 LocationSummary* locations = instruction->GetLocations();
1031 DCHECK(locations->InAt(0).Equals(locations->Out()));
1032 const ZRegister acc = ZRegisterFrom(locations->InAt(0));
1033 const ZRegister left = ZRegisterFrom(locations->InAt(1));
1034 const ZRegister right = ZRegisterFrom(locations->InAt(2));
1035 const PRegisterM p_reg = LoopPReg().Merging();
1036 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1037 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1038 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1039 HVecOperation::ToSignedType(b->GetPackedType()));
1040 DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1041 ValidateVectorLength(instruction);
1042
1043 size_t inputs_data_size = DataType::Size(a->GetPackedType());
1044 switch (inputs_data_size) {
1045 case 1u: {
1046 UseScratchRegisterScope temps(GetVIXLAssembler());
1047 const ZRegister tmp0 = temps.AcquireZ();
1048 const ZRegister tmp1 = ZRegisterFrom(locations->GetTemp(0));
1049
1050 __ Dup(tmp1.VnB(), 0u);
1051 __ Sel(tmp0.VnB(), p_reg, left.VnB(), tmp1.VnB());
1052 __ Sel(tmp1.VnB(), p_reg, right.VnB(), tmp1.VnB());
1053 if (instruction->IsZeroExtending()) {
1054 __ Udot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
1055 } else {
1056 __ Sdot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
1057 }
1058 break;
1059 }
1060 default:
1061 LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1062 }
1063 }
1064
1065 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1066 static void CreateVecMemLocations(ArenaAllocator* allocator,
1067 HVecMemoryOperation* instruction,
1068 bool is_load) {
1069 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1070 switch (instruction->GetPackedType()) {
1071 case DataType::Type::kBool:
1072 case DataType::Type::kUint8:
1073 case DataType::Type::kInt8:
1074 case DataType::Type::kUint16:
1075 case DataType::Type::kInt16:
1076 case DataType::Type::kInt32:
1077 case DataType::Type::kInt64:
1078 case DataType::Type::kFloat32:
1079 case DataType::Type::kFloat64:
1080 locations->SetInAt(0, Location::RequiresRegister());
1081 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1082 if (is_load) {
1083 locations->SetOut(Location::RequiresFpuRegister());
1084 } else {
1085 locations->SetInAt(2, Location::RequiresFpuRegister());
1086 }
1087 break;
1088 default:
1089 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1090 UNREACHABLE();
1091 }
1092 }
1093
VisitVecLoad(HVecLoad * instruction)1094 void LocationsBuilderARM64Sve::VisitVecLoad(HVecLoad* instruction) {
1095 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1096 }
1097
VisitVecLoad(HVecLoad * instruction)1098 void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
1099 DCHECK(instruction->IsPredicated());
1100 LocationSummary* locations = instruction->GetLocations();
1101 size_t size = DataType::Size(instruction->GetPackedType());
1102 const ZRegister reg = ZRegisterFrom(locations->Out());
1103 UseScratchRegisterScope temps(GetVIXLAssembler());
1104 Register scratch;
1105 const PRegisterZ p_reg = LoopPReg().Zeroing();
1106 ValidateVectorLength(instruction);
1107
1108 switch (instruction->GetPackedType()) {
1109 case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1110 case DataType::Type::kUint16:
1111 __ Ld1h(reg.VnH(), p_reg,
1112 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1113 break;
1114 case DataType::Type::kBool:
1115 case DataType::Type::kUint8:
1116 case DataType::Type::kInt8:
1117 __ Ld1b(reg.VnB(), p_reg,
1118 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1119 break;
1120 case DataType::Type::kInt32:
1121 case DataType::Type::kFloat32:
1122 __ Ld1w(reg.VnS(), p_reg,
1123 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1124 break;
1125 case DataType::Type::kInt64:
1126 case DataType::Type::kFloat64:
1127 __ Ld1d(reg.VnD(), p_reg,
1128 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1129 break;
1130 default:
1131 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1132 UNREACHABLE();
1133 }
1134 }
1135
VisitVecStore(HVecStore * instruction)1136 void LocationsBuilderARM64Sve::VisitVecStore(HVecStore* instruction) {
1137 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1138 }
1139
VisitVecStore(HVecStore * instruction)1140 void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
1141 DCHECK(instruction->IsPredicated());
1142 LocationSummary* locations = instruction->GetLocations();
1143 size_t size = DataType::Size(instruction->GetPackedType());
1144 const ZRegister reg = ZRegisterFrom(locations->InAt(2));
1145 UseScratchRegisterScope temps(GetVIXLAssembler());
1146 Register scratch;
1147 const PRegisterZ p_reg = LoopPReg().Zeroing();
1148 ValidateVectorLength(instruction);
1149
1150 switch (instruction->GetPackedType()) {
1151 case DataType::Type::kBool:
1152 case DataType::Type::kUint8:
1153 case DataType::Type::kInt8:
1154 __ St1b(reg.VnB(), p_reg,
1155 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1156 break;
1157 case DataType::Type::kUint16:
1158 case DataType::Type::kInt16:
1159 __ St1h(reg.VnH(), p_reg,
1160 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1161 break;
1162 case DataType::Type::kInt32:
1163 case DataType::Type::kFloat32:
1164 __ St1w(reg.VnS(), p_reg,
1165 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1166 break;
1167 case DataType::Type::kInt64:
1168 case DataType::Type::kFloat64:
1169 __ St1d(reg.VnD(), p_reg,
1170 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1171 break;
1172 default:
1173 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1174 UNREACHABLE();
1175 }
1176 }
1177
VisitVecPredSetAll(HVecPredSetAll * instruction)1178 void LocationsBuilderARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1179 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1180 DCHECK(instruction->InputAt(0)->IsIntConstant());
1181 locations->SetInAt(0, Location::NoLocation());
1182 locations->SetOut(Location::NoLocation());
1183 }
1184
VisitVecPredSetAll(HVecPredSetAll * instruction)1185 void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1186 // Instruction is not predicated, see nodes_vector.h
1187 DCHECK(!instruction->IsPredicated());
1188 const PRegister p_reg = LoopPReg();
1189
1190 switch (instruction->GetPackedType()) {
1191 case DataType::Type::kBool:
1192 case DataType::Type::kUint8:
1193 case DataType::Type::kInt8:
1194 __ Ptrue(p_reg.VnB(), vixl::aarch64::SVE_ALL);
1195 break;
1196 case DataType::Type::kUint16:
1197 case DataType::Type::kInt16:
1198 __ Ptrue(p_reg.VnH(), vixl::aarch64::SVE_ALL);
1199 break;
1200 case DataType::Type::kInt32:
1201 case DataType::Type::kFloat32:
1202 __ Ptrue(p_reg.VnS(), vixl::aarch64::SVE_ALL);
1203 break;
1204 case DataType::Type::kInt64:
1205 case DataType::Type::kFloat64:
1206 __ Ptrue(p_reg.VnD(), vixl::aarch64::SVE_ALL);
1207 break;
1208 default:
1209 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1210 UNREACHABLE();
1211 }
1212 }
1213
VisitVecPredWhile(HVecPredWhile * instruction)1214 void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
1215 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1216 locations->SetInAt(0, Location::RequiresRegister());
1217 locations->SetInAt(1, Location::RequiresRegister());
1218 // The instruction doesn't really need a core register as out location; this is a hack
1219 // to workaround absence of support for vector predicates in register allocation.
1220 //
1221 // Semantically, the out location of this instruction and predicate inputs locations of
1222 // its users should be a fixed predicate register (similar to
1223 // Location::RegisterLocation(int reg)). But the register allocator (RA) doesn't support
1224 // SIMD regs (e.g. predicate), so LoopPReg() is used explicitly without exposing it
1225 // to the RA.
1226 //
1227 // To make the RA happy Location::NoLocation() was used for all the vector instructions
1228 // predicate inputs; but for the PredSetOperations (e.g. VecPredWhile) Location::NoLocation()
1229 // can't be used without changes to RA - "ssa_liveness_analysis.cc] Check failed:
1230 // input->IsEmittedAtUseSite()" would fire.
1231 //
1232 // Using a core register as a hack is the easiest way to tackle this problem. The RA will
1233 // block one core register for the loop without actually using it; this should not be
1234 // a performance issue as a SIMD loop operates mainly on SIMD registers.
1235 //
1236 // TODO: Support SIMD types in register allocator.
1237 locations->SetOut(Location::RequiresRegister());
1238 }
1239
VisitVecPredWhile(HVecPredWhile * instruction)1240 void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
1241 // Instruction is not predicated, see nodes_vector.h
1242 DCHECK(!instruction->IsPredicated());
1243 // Current implementation of predicated loop execution only supports kLO condition.
1244 DCHECK(instruction->GetCondKind() == HVecPredWhile::CondKind::kLO);
1245 Register left = InputRegisterAt(instruction, 0);
1246 Register right = InputRegisterAt(instruction, 1);
1247
1248 DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u);
1249
1250 switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) {
1251 case 1u:
1252 __ Whilelo(LoopPReg().VnB(), left, right);
1253 break;
1254 case 2u:
1255 __ Whilelo(LoopPReg().VnH(), left, right);
1256 break;
1257 case 4u:
1258 __ Whilelo(LoopPReg().VnS(), left, right);
1259 break;
1260 case 8u:
1261 __ Whilelo(LoopPReg().VnD(), left, right);
1262 break;
1263 default:
1264 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1265 UNREACHABLE();
1266 }
1267 }
1268
VisitVecPredCondition(HVecPredCondition * instruction)1269 void LocationsBuilderARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
1270 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1271 locations->SetInAt(0, Location::NoLocation());
1272 // Result of the operation - a boolean value in a core register.
1273 locations->SetOut(Location::RequiresRegister());
1274 }
1275
VisitVecPredCondition(HVecPredCondition * instruction)1276 void InstructionCodeGeneratorARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
1277 // Instruction is not predicated, see nodes_vector.h
1278 DCHECK(!instruction->IsPredicated());
1279 Register reg = OutputRegister(instruction);
1280 // Currently VecPredCondition is only used as part of vectorized loop check condition
1281 // evaluation.
1282 DCHECK(instruction->GetPCondKind() == HVecPredCondition::PCondKind::kNFirst);
1283 __ Cset(reg, pl);
1284 }
1285
AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope * scope)1286 Location InstructionCodeGeneratorARM64Sve::AllocateSIMDScratchLocation(
1287 vixl::aarch64::UseScratchRegisterScope* scope) {
1288 return LocationFrom(scope->AcquireZ());
1289 }
1290
FreeSIMDScratchLocation(Location loc,vixl::aarch64::UseScratchRegisterScope * scope)1291 void InstructionCodeGeneratorARM64Sve::FreeSIMDScratchLocation(Location loc,
1292 vixl::aarch64::UseScratchRegisterScope* scope) {
1293 scope->Release(ZRegisterFrom(loc));
1294 }
1295
LoadSIMDRegFromStack(Location destination,Location source)1296 void InstructionCodeGeneratorARM64Sve::LoadSIMDRegFromStack(Location destination,
1297 Location source) {
1298 __ Ldr(ZRegisterFrom(destination), SveStackOperandFrom(source));
1299 }
1300
MoveSIMDRegToSIMDReg(Location destination,Location source)1301 void InstructionCodeGeneratorARM64Sve::MoveSIMDRegToSIMDReg(Location destination,
1302 Location source) {
1303 __ Mov(ZRegisterFrom(destination), ZRegisterFrom(source));
1304 }
1305
MoveToSIMDStackSlot(Location destination,Location source)1306 void InstructionCodeGeneratorARM64Sve::MoveToSIMDStackSlot(Location destination,
1307 Location source) {
1308 DCHECK(destination.IsSIMDStackSlot());
1309
1310 if (source.IsFpuRegister()) {
1311 __ Str(ZRegisterFrom(source), SveStackOperandFrom(destination));
1312 } else {
1313 DCHECK(source.IsSIMDStackSlot());
1314 UseScratchRegisterScope temps(GetVIXLAssembler());
1315 if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
1316 // Very rare situation, only when there are cycles in ParallelMoveResolver graph.
1317 const Register temp = temps.AcquireX();
1318 DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % kArm64WordSize, 0u);
1319 // Emit a number of LDR/STR (XRegister, 64-bit) to cover the whole SIMD register size
1320 // when copying a stack slot.
1321 for (size_t offset = 0, e = codegen_->GetSIMDRegisterWidth();
1322 offset < e;
1323 offset += kArm64WordSize) {
1324 __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + offset));
1325 __ Str(temp, MemOperand(sp, destination.GetStackIndex() + offset));
1326 }
1327 } else {
1328 const ZRegister temp = temps.AcquireZ();
1329 __ Ldr(temp, SveStackOperandFrom(source));
1330 __ Str(temp, SveStackOperandFrom(destination));
1331 }
1332 }
1333 }
1334
1335 template <bool is_save>
SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64 * codegen,LocationSummary * locations,int64_t spill_offset)1336 void SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64* codegen,
1337 LocationSummary* locations,
1338 int64_t spill_offset) {
1339 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
1340 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
1341 DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
1342 codegen->GetNumberOfCoreRegisters(),
1343 fp_spills,
1344 codegen->GetNumberOfFloatingPointRegisters()));
1345 MacroAssembler* masm = codegen->GetVIXLAssembler();
1346 Register base = masm->StackPointer();
1347
1348 CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
1349 int64_t core_spill_size = core_list.GetTotalSizeInBytes();
1350 int64_t fp_spill_offset = spill_offset + core_spill_size;
1351
1352 if (codegen->GetGraph()->HasSIMD()) {
1353 if (is_save) {
1354 masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1355 } else {
1356 masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1357 }
1358 codegen->GetAssembler()->SaveRestoreZRegisterList<is_save>(fp_spills, fp_spill_offset);
1359 return;
1360 }
1361
1362 // Case when we only need to restore D-registers.
1363 DCHECK(!codegen->GetGraph()->HasSIMD());
1364 DCHECK_LE(codegen->GetSlowPathFPWidth(), kDRegSizeInBytes);
1365 CPURegList fp_list = CPURegList(CPURegister::kVRegister, kDRegSize, fp_spills);
1366 if (is_save) {
1367 masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1368 masm->StoreCPURegList(fp_list, MemOperand(base, fp_spill_offset));
1369 } else {
1370 masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1371 masm->LoadCPURegList(fp_list, MemOperand(base, fp_spill_offset));
1372 }
1373 }
1374
SaveLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1375 void InstructionCodeGeneratorARM64Sve::SaveLiveRegistersHelper(LocationSummary* locations,
1376 int64_t spill_offset) {
1377 SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ true>(codegen_, locations, spill_offset);
1378 }
1379
RestoreLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1380 void InstructionCodeGeneratorARM64Sve::RestoreLiveRegistersHelper(LocationSummary* locations,
1381 int64_t spill_offset) {
1382 SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ false>(codegen_, locations, spill_offset);
1383 }
1384
1385 #undef __
1386
1387 } // namespace arm64
1388 } // namespace art
1389