1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "mirror/array-inl.h"
20 #include "mirror/string.h"
21
22 namespace art {
23 namespace x86_64 {
24
25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
26 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
27
VisitVecReplicateScalar(HVecReplicateScalar * instruction)28 void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
29 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
30 HInstruction* input = instruction->InputAt(0);
31 bool is_zero = IsZeroBitPattern(input);
32 switch (instruction->GetPackedType()) {
33 case DataType::Type::kBool:
34 case DataType::Type::kUint8:
35 case DataType::Type::kInt8:
36 case DataType::Type::kUint16:
37 case DataType::Type::kInt16:
38 case DataType::Type::kInt32:
39 case DataType::Type::kInt64:
40 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
41 : Location::RequiresRegister());
42 locations->SetOut(Location::RequiresFpuRegister());
43 break;
44 case DataType::Type::kFloat32:
45 case DataType::Type::kFloat64:
46 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
47 : Location::RequiresFpuRegister());
48 locations->SetOut(is_zero ? Location::RequiresFpuRegister()
49 : Location::SameAsFirstInput());
50 break;
51 default:
52 LOG(FATAL) << "Unsupported SIMD type";
53 UNREACHABLE();
54 }
55 }
56
VisitVecReplicateScalar(HVecReplicateScalar * instruction)57 void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
58 LocationSummary* locations = instruction->GetLocations();
59 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
60
61 // Shorthand for any type of zero.
62 if (IsZeroBitPattern(instruction->InputAt(0))) {
63 __ xorps(dst, dst);
64 return;
65 }
66
67 switch (instruction->GetPackedType()) {
68 case DataType::Type::kBool:
69 case DataType::Type::kUint8:
70 case DataType::Type::kInt8:
71 DCHECK_EQ(16u, instruction->GetVectorLength());
72 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
73 __ punpcklbw(dst, dst);
74 __ punpcklwd(dst, dst);
75 __ pshufd(dst, dst, Immediate(0));
76 break;
77 case DataType::Type::kUint16:
78 case DataType::Type::kInt16:
79 DCHECK_EQ(8u, instruction->GetVectorLength());
80 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
81 __ punpcklwd(dst, dst);
82 __ pshufd(dst, dst, Immediate(0));
83 break;
84 case DataType::Type::kInt32:
85 DCHECK_EQ(4u, instruction->GetVectorLength());
86 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
87 __ pshufd(dst, dst, Immediate(0));
88 break;
89 case DataType::Type::kInt64:
90 DCHECK_EQ(2u, instruction->GetVectorLength());
91 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ true);
92 __ punpcklqdq(dst, dst);
93 break;
94 case DataType::Type::kFloat32:
95 DCHECK_EQ(4u, instruction->GetVectorLength());
96 DCHECK(locations->InAt(0).Equals(locations->Out()));
97 __ shufps(dst, dst, Immediate(0));
98 break;
99 case DataType::Type::kFloat64:
100 DCHECK_EQ(2u, instruction->GetVectorLength());
101 DCHECK(locations->InAt(0).Equals(locations->Out()));
102 __ shufpd(dst, dst, Immediate(0));
103 break;
104 default:
105 LOG(FATAL) << "Unsupported SIMD type";
106 UNREACHABLE();
107 }
108 }
109
VisitVecExtractScalar(HVecExtractScalar * instruction)110 void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
111 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
112 switch (instruction->GetPackedType()) {
113 case DataType::Type::kBool:
114 case DataType::Type::kUint8:
115 case DataType::Type::kInt8:
116 case DataType::Type::kUint16:
117 case DataType::Type::kInt16:
118 case DataType::Type::kInt32:
119 case DataType::Type::kInt64:
120 locations->SetInAt(0, Location::RequiresFpuRegister());
121 locations->SetOut(Location::RequiresRegister());
122 break;
123 case DataType::Type::kFloat32:
124 case DataType::Type::kFloat64:
125 locations->SetInAt(0, Location::RequiresFpuRegister());
126 locations->SetOut(Location::SameAsFirstInput());
127 break;
128 default:
129 LOG(FATAL) << "Unsupported SIMD type";
130 UNREACHABLE();
131 }
132 }
133
VisitVecExtractScalar(HVecExtractScalar * instruction)134 void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
135 LocationSummary* locations = instruction->GetLocations();
136 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
137 switch (instruction->GetPackedType()) {
138 case DataType::Type::kBool:
139 case DataType::Type::kUint8:
140 case DataType::Type::kInt8:
141 case DataType::Type::kUint16:
142 case DataType::Type::kInt16: // TODO: up to here, and?
143 LOG(FATAL) << "Unsupported SIMD type";
144 UNREACHABLE();
145 case DataType::Type::kInt32:
146 DCHECK_EQ(4u, instruction->GetVectorLength());
147 __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ false);
148 break;
149 case DataType::Type::kInt64:
150 DCHECK_EQ(2u, instruction->GetVectorLength());
151 __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ true);
152 break;
153 case DataType::Type::kFloat32:
154 case DataType::Type::kFloat64:
155 DCHECK_LE(2u, instruction->GetVectorLength());
156 DCHECK_LE(instruction->GetVectorLength(), 4u);
157 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
158 break;
159 default:
160 LOG(FATAL) << "Unsupported SIMD type";
161 UNREACHABLE();
162 }
163 }
164
165 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)166 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
167 LocationSummary* locations = new (allocator) LocationSummary(instruction);
168 switch (instruction->GetPackedType()) {
169 case DataType::Type::kBool:
170 case DataType::Type::kUint8:
171 case DataType::Type::kInt8:
172 case DataType::Type::kUint16:
173 case DataType::Type::kInt16:
174 case DataType::Type::kInt32:
175 case DataType::Type::kInt64:
176 case DataType::Type::kFloat32:
177 case DataType::Type::kFloat64:
178 locations->SetInAt(0, Location::RequiresFpuRegister());
179 locations->SetOut(Location::RequiresFpuRegister());
180 break;
181 default:
182 LOG(FATAL) << "Unsupported SIMD type";
183 UNREACHABLE();
184 }
185 }
186
VisitVecReduce(HVecReduce * instruction)187 void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
188 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
189 // Long reduction or min/max require a temporary.
190 if (instruction->GetPackedType() == DataType::Type::kInt64 ||
191 instruction->GetKind() == HVecReduce::kMin ||
192 instruction->GetKind() == HVecReduce::kMax) {
193 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
194 }
195 }
196
VisitVecReduce(HVecReduce * instruction)197 void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
198 LocationSummary* locations = instruction->GetLocations();
199 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
200 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
201 switch (instruction->GetPackedType()) {
202 case DataType::Type::kInt32:
203 DCHECK_EQ(4u, instruction->GetVectorLength());
204 switch (instruction->GetKind()) {
205 case HVecReduce::kSum:
206 __ movaps(dst, src);
207 __ phaddd(dst, dst);
208 __ phaddd(dst, dst);
209 break;
210 case HVecReduce::kMin: {
211 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
212 __ movaps(tmp, src);
213 __ movaps(dst, src);
214 __ psrldq(tmp, Immediate(8));
215 __ pminsd(dst, tmp);
216 __ psrldq(tmp, Immediate(4));
217 __ pminsd(dst, tmp);
218 break;
219 }
220 case HVecReduce::kMax: {
221 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
222 __ movaps(tmp, src);
223 __ movaps(dst, src);
224 __ psrldq(tmp, Immediate(8));
225 __ pmaxsd(dst, tmp);
226 __ psrldq(tmp, Immediate(4));
227 __ pmaxsd(dst, tmp);
228 break;
229 }
230 }
231 break;
232 case DataType::Type::kInt64: {
233 DCHECK_EQ(2u, instruction->GetVectorLength());
234 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
235 switch (instruction->GetKind()) {
236 case HVecReduce::kSum:
237 __ movaps(tmp, src);
238 __ movaps(dst, src);
239 __ punpckhqdq(tmp, tmp);
240 __ paddq(dst, tmp);
241 break;
242 case HVecReduce::kMin:
243 case HVecReduce::kMax:
244 LOG(FATAL) << "Unsupported SIMD type";
245 }
246 break;
247 }
248 default:
249 LOG(FATAL) << "Unsupported SIMD type";
250 UNREACHABLE();
251 }
252 }
253
VisitVecCnv(HVecCnv * instruction)254 void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
255 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
256 }
257
VisitVecCnv(HVecCnv * instruction)258 void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
259 LocationSummary* locations = instruction->GetLocations();
260 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
261 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
262 DataType::Type from = instruction->GetInputType();
263 DataType::Type to = instruction->GetResultType();
264 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
265 DCHECK_EQ(4u, instruction->GetVectorLength());
266 __ cvtdq2ps(dst, src);
267 } else {
268 LOG(FATAL) << "Unsupported SIMD type";
269 }
270 }
271
VisitVecNeg(HVecNeg * instruction)272 void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) {
273 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
274 }
275
VisitVecNeg(HVecNeg * instruction)276 void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
277 LocationSummary* locations = instruction->GetLocations();
278 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
279 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
280 switch (instruction->GetPackedType()) {
281 case DataType::Type::kUint8:
282 case DataType::Type::kInt8:
283 DCHECK_EQ(16u, instruction->GetVectorLength());
284 __ pxor(dst, dst);
285 __ psubb(dst, src);
286 break;
287 case DataType::Type::kUint16:
288 case DataType::Type::kInt16:
289 DCHECK_EQ(8u, instruction->GetVectorLength());
290 __ pxor(dst, dst);
291 __ psubw(dst, src);
292 break;
293 case DataType::Type::kInt32:
294 DCHECK_EQ(4u, instruction->GetVectorLength());
295 __ pxor(dst, dst);
296 __ psubd(dst, src);
297 break;
298 case DataType::Type::kInt64:
299 DCHECK_EQ(2u, instruction->GetVectorLength());
300 __ pxor(dst, dst);
301 __ psubq(dst, src);
302 break;
303 case DataType::Type::kFloat32:
304 DCHECK_EQ(4u, instruction->GetVectorLength());
305 __ xorps(dst, dst);
306 __ subps(dst, src);
307 break;
308 case DataType::Type::kFloat64:
309 DCHECK_EQ(2u, instruction->GetVectorLength());
310 __ xorpd(dst, dst);
311 __ subpd(dst, src);
312 break;
313 default:
314 LOG(FATAL) << "Unsupported SIMD type";
315 UNREACHABLE();
316 }
317 }
318
VisitVecAbs(HVecAbs * instruction)319 void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
320 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
321 // Integral-abs requires a temporary for the comparison.
322 if (instruction->GetPackedType() == DataType::Type::kInt32) {
323 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
324 }
325 }
326
VisitVecAbs(HVecAbs * instruction)327 void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
328 LocationSummary* locations = instruction->GetLocations();
329 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
330 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
331 switch (instruction->GetPackedType()) {
332 case DataType::Type::kInt32: {
333 DCHECK_EQ(4u, instruction->GetVectorLength());
334 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
335 __ movaps(dst, src);
336 __ pxor(tmp, tmp);
337 __ pcmpgtd(tmp, dst);
338 __ pxor(dst, tmp);
339 __ psubd(dst, tmp);
340 break;
341 }
342 case DataType::Type::kFloat32:
343 DCHECK_EQ(4u, instruction->GetVectorLength());
344 __ pcmpeqb(dst, dst); // all ones
345 __ psrld(dst, Immediate(1));
346 __ andps(dst, src);
347 break;
348 case DataType::Type::kFloat64:
349 DCHECK_EQ(2u, instruction->GetVectorLength());
350 __ pcmpeqb(dst, dst); // all ones
351 __ psrlq(dst, Immediate(1));
352 __ andpd(dst, src);
353 break;
354 default:
355 LOG(FATAL) << "Unsupported SIMD type";
356 UNREACHABLE();
357 }
358 }
359
VisitVecNot(HVecNot * instruction)360 void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
361 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
362 // Boolean-not requires a temporary to construct the 16 x one.
363 if (instruction->GetPackedType() == DataType::Type::kBool) {
364 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
365 }
366 }
367
VisitVecNot(HVecNot * instruction)368 void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
369 LocationSummary* locations = instruction->GetLocations();
370 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
371 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
372 switch (instruction->GetPackedType()) {
373 case DataType::Type::kBool: { // special case boolean-not
374 DCHECK_EQ(16u, instruction->GetVectorLength());
375 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
376 __ pxor(dst, dst);
377 __ pcmpeqb(tmp, tmp); // all ones
378 __ psubb(dst, tmp); // 16 x one
379 __ pxor(dst, src);
380 break;
381 }
382 case DataType::Type::kUint8:
383 case DataType::Type::kInt8:
384 case DataType::Type::kUint16:
385 case DataType::Type::kInt16:
386 case DataType::Type::kInt32:
387 case DataType::Type::kInt64:
388 DCHECK_LE(2u, instruction->GetVectorLength());
389 DCHECK_LE(instruction->GetVectorLength(), 16u);
390 __ pcmpeqb(dst, dst); // all ones
391 __ pxor(dst, src);
392 break;
393 case DataType::Type::kFloat32:
394 DCHECK_EQ(4u, instruction->GetVectorLength());
395 __ pcmpeqb(dst, dst); // all ones
396 __ xorps(dst, src);
397 break;
398 case DataType::Type::kFloat64:
399 DCHECK_EQ(2u, instruction->GetVectorLength());
400 __ pcmpeqb(dst, dst); // all ones
401 __ xorpd(dst, src);
402 break;
403 default:
404 LOG(FATAL) << "Unsupported SIMD type";
405 UNREACHABLE();
406 }
407 }
408
409 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)410 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
411 LocationSummary* locations = new (allocator) LocationSummary(instruction);
412 switch (instruction->GetPackedType()) {
413 case DataType::Type::kBool:
414 case DataType::Type::kUint8:
415 case DataType::Type::kInt8:
416 case DataType::Type::kUint16:
417 case DataType::Type::kInt16:
418 case DataType::Type::kInt32:
419 case DataType::Type::kInt64:
420 case DataType::Type::kFloat32:
421 case DataType::Type::kFloat64:
422 locations->SetInAt(0, Location::RequiresFpuRegister());
423 locations->SetInAt(1, Location::RequiresFpuRegister());
424 locations->SetOut(Location::SameAsFirstInput());
425 break;
426 default:
427 LOG(FATAL) << "Unsupported SIMD type";
428 UNREACHABLE();
429 }
430 }
431
VisitVecAdd(HVecAdd * instruction)432 void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
433 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
434 }
435
VisitVecAdd(HVecAdd * instruction)436 void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
437 LocationSummary* locations = instruction->GetLocations();
438 DCHECK(locations->InAt(0).Equals(locations->Out()));
439 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
440 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
441 switch (instruction->GetPackedType()) {
442 case DataType::Type::kUint8:
443 case DataType::Type::kInt8:
444 DCHECK_EQ(16u, instruction->GetVectorLength());
445 __ paddb(dst, src);
446 break;
447 case DataType::Type::kUint16:
448 case DataType::Type::kInt16:
449 DCHECK_EQ(8u, instruction->GetVectorLength());
450 __ paddw(dst, src);
451 break;
452 case DataType::Type::kInt32:
453 DCHECK_EQ(4u, instruction->GetVectorLength());
454 __ paddd(dst, src);
455 break;
456 case DataType::Type::kInt64:
457 DCHECK_EQ(2u, instruction->GetVectorLength());
458 __ paddq(dst, src);
459 break;
460 case DataType::Type::kFloat32:
461 DCHECK_EQ(4u, instruction->GetVectorLength());
462 __ addps(dst, src);
463 break;
464 case DataType::Type::kFloat64:
465 DCHECK_EQ(2u, instruction->GetVectorLength());
466 __ addpd(dst, src);
467 break;
468 default:
469 LOG(FATAL) << "Unsupported SIMD type";
470 UNREACHABLE();
471 }
472 }
473
VisitVecHalvingAdd(HVecHalvingAdd * instruction)474 void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
475 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
476 }
477
VisitVecHalvingAdd(HVecHalvingAdd * instruction)478 void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
479 LocationSummary* locations = instruction->GetLocations();
480 DCHECK(locations->InAt(0).Equals(locations->Out()));
481 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
482 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
483
484 DCHECK(instruction->IsRounded());
485
486 switch (instruction->GetPackedType()) {
487 case DataType::Type::kUint8:
488 DCHECK_EQ(16u, instruction->GetVectorLength());
489 __ pavgb(dst, src);
490 return;
491 case DataType::Type::kUint16:
492 DCHECK_EQ(8u, instruction->GetVectorLength());
493 __ pavgw(dst, src);
494 return;
495 default:
496 LOG(FATAL) << "Unsupported SIMD type";
497 UNREACHABLE();
498 }
499 }
500
VisitVecSub(HVecSub * instruction)501 void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
502 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
503 }
504
VisitVecSub(HVecSub * instruction)505 void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
506 LocationSummary* locations = instruction->GetLocations();
507 DCHECK(locations->InAt(0).Equals(locations->Out()));
508 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
509 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
510 switch (instruction->GetPackedType()) {
511 case DataType::Type::kUint8:
512 case DataType::Type::kInt8:
513 DCHECK_EQ(16u, instruction->GetVectorLength());
514 __ psubb(dst, src);
515 break;
516 case DataType::Type::kUint16:
517 case DataType::Type::kInt16:
518 DCHECK_EQ(8u, instruction->GetVectorLength());
519 __ psubw(dst, src);
520 break;
521 case DataType::Type::kInt32:
522 DCHECK_EQ(4u, instruction->GetVectorLength());
523 __ psubd(dst, src);
524 break;
525 case DataType::Type::kInt64:
526 DCHECK_EQ(2u, instruction->GetVectorLength());
527 __ psubq(dst, src);
528 break;
529 case DataType::Type::kFloat32:
530 DCHECK_EQ(4u, instruction->GetVectorLength());
531 __ subps(dst, src);
532 break;
533 case DataType::Type::kFloat64:
534 DCHECK_EQ(2u, instruction->GetVectorLength());
535 __ subpd(dst, src);
536 break;
537 default:
538 LOG(FATAL) << "Unsupported SIMD type";
539 UNREACHABLE();
540 }
541 }
542
VisitVecMul(HVecMul * instruction)543 void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
544 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
545 }
546
VisitVecMul(HVecMul * instruction)547 void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
548 LocationSummary* locations = instruction->GetLocations();
549 DCHECK(locations->InAt(0).Equals(locations->Out()));
550 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
551 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
552 switch (instruction->GetPackedType()) {
553 case DataType::Type::kUint16:
554 case DataType::Type::kInt16:
555 DCHECK_EQ(8u, instruction->GetVectorLength());
556 __ pmullw(dst, src);
557 break;
558 case DataType::Type::kInt32:
559 DCHECK_EQ(4u, instruction->GetVectorLength());
560 __ pmulld(dst, src);
561 break;
562 case DataType::Type::kFloat32:
563 DCHECK_EQ(4u, instruction->GetVectorLength());
564 __ mulps(dst, src);
565 break;
566 case DataType::Type::kFloat64:
567 DCHECK_EQ(2u, instruction->GetVectorLength());
568 __ mulpd(dst, src);
569 break;
570 default:
571 LOG(FATAL) << "Unsupported SIMD type";
572 UNREACHABLE();
573 }
574 }
575
VisitVecDiv(HVecDiv * instruction)576 void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
577 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
578 }
579
VisitVecDiv(HVecDiv * instruction)580 void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
581 LocationSummary* locations = instruction->GetLocations();
582 DCHECK(locations->InAt(0).Equals(locations->Out()));
583 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
584 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
585 switch (instruction->GetPackedType()) {
586 case DataType::Type::kFloat32:
587 DCHECK_EQ(4u, instruction->GetVectorLength());
588 __ divps(dst, src);
589 break;
590 case DataType::Type::kFloat64:
591 DCHECK_EQ(2u, instruction->GetVectorLength());
592 __ divpd(dst, src);
593 break;
594 default:
595 LOG(FATAL) << "Unsupported SIMD type";
596 UNREACHABLE();
597 }
598 }
599
VisitVecMin(HVecMin * instruction)600 void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
601 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
602 }
603
VisitVecMin(HVecMin * instruction)604 void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
605 LocationSummary* locations = instruction->GetLocations();
606 DCHECK(locations->InAt(0).Equals(locations->Out()));
607 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
608 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
609 switch (instruction->GetPackedType()) {
610 case DataType::Type::kUint8:
611 DCHECK_EQ(16u, instruction->GetVectorLength());
612 __ pminub(dst, src);
613 break;
614 case DataType::Type::kInt8:
615 DCHECK_EQ(16u, instruction->GetVectorLength());
616 __ pminsb(dst, src);
617 break;
618 case DataType::Type::kUint16:
619 DCHECK_EQ(8u, instruction->GetVectorLength());
620 __ pminuw(dst, src);
621 break;
622 case DataType::Type::kInt16:
623 DCHECK_EQ(8u, instruction->GetVectorLength());
624 __ pminsw(dst, src);
625 break;
626 case DataType::Type::kUint32:
627 DCHECK_EQ(4u, instruction->GetVectorLength());
628 __ pminud(dst, src);
629 break;
630 case DataType::Type::kInt32:
631 DCHECK_EQ(4u, instruction->GetVectorLength());
632 __ pminsd(dst, src);
633 break;
634 // Next cases are sloppy wrt 0.0 vs -0.0.
635 case DataType::Type::kFloat32:
636 DCHECK_EQ(4u, instruction->GetVectorLength());
637 __ minps(dst, src);
638 break;
639 case DataType::Type::kFloat64:
640 DCHECK_EQ(2u, instruction->GetVectorLength());
641 __ minpd(dst, src);
642 break;
643 default:
644 LOG(FATAL) << "Unsupported SIMD type";
645 UNREACHABLE();
646 }
647 }
648
VisitVecMax(HVecMax * instruction)649 void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
650 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
651 }
652
VisitVecMax(HVecMax * instruction)653 void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
654 LocationSummary* locations = instruction->GetLocations();
655 DCHECK(locations->InAt(0).Equals(locations->Out()));
656 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
657 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
658 switch (instruction->GetPackedType()) {
659 case DataType::Type::kUint8:
660 DCHECK_EQ(16u, instruction->GetVectorLength());
661 __ pmaxub(dst, src);
662 break;
663 case DataType::Type::kInt8:
664 DCHECK_EQ(16u, instruction->GetVectorLength());
665 __ pmaxsb(dst, src);
666 break;
667 case DataType::Type::kUint16:
668 DCHECK_EQ(8u, instruction->GetVectorLength());
669 __ pmaxuw(dst, src);
670 break;
671 case DataType::Type::kInt16:
672 DCHECK_EQ(8u, instruction->GetVectorLength());
673 __ pmaxsw(dst, src);
674 break;
675 case DataType::Type::kUint32:
676 DCHECK_EQ(4u, instruction->GetVectorLength());
677 __ pmaxud(dst, src);
678 break;
679 case DataType::Type::kInt32:
680 DCHECK_EQ(4u, instruction->GetVectorLength());
681 __ pmaxsd(dst, src);
682 break;
683 // Next cases are sloppy wrt 0.0 vs -0.0.
684 case DataType::Type::kFloat32:
685 DCHECK_EQ(4u, instruction->GetVectorLength());
686 __ maxps(dst, src);
687 break;
688 case DataType::Type::kFloat64:
689 DCHECK_EQ(2u, instruction->GetVectorLength());
690 __ maxpd(dst, src);
691 break;
692 default:
693 LOG(FATAL) << "Unsupported SIMD type";
694 UNREACHABLE();
695 }
696 }
697
VisitVecAnd(HVecAnd * instruction)698 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
699 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
700 }
701
VisitVecAnd(HVecAnd * instruction)702 void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
703 LocationSummary* locations = instruction->GetLocations();
704 DCHECK(locations->InAt(0).Equals(locations->Out()));
705 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
706 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
707 switch (instruction->GetPackedType()) {
708 case DataType::Type::kBool:
709 case DataType::Type::kUint8:
710 case DataType::Type::kInt8:
711 case DataType::Type::kUint16:
712 case DataType::Type::kInt16:
713 case DataType::Type::kInt32:
714 case DataType::Type::kInt64:
715 DCHECK_LE(2u, instruction->GetVectorLength());
716 DCHECK_LE(instruction->GetVectorLength(), 16u);
717 __ pand(dst, src);
718 break;
719 case DataType::Type::kFloat32:
720 DCHECK_EQ(4u, instruction->GetVectorLength());
721 __ andps(dst, src);
722 break;
723 case DataType::Type::kFloat64:
724 DCHECK_EQ(2u, instruction->GetVectorLength());
725 __ andpd(dst, src);
726 break;
727 default:
728 LOG(FATAL) << "Unsupported SIMD type";
729 UNREACHABLE();
730 }
731 }
732
VisitVecAndNot(HVecAndNot * instruction)733 void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) {
734 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
735 }
736
VisitVecAndNot(HVecAndNot * instruction)737 void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
738 LocationSummary* locations = instruction->GetLocations();
739 DCHECK(locations->InAt(0).Equals(locations->Out()));
740 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
741 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
742 switch (instruction->GetPackedType()) {
743 case DataType::Type::kBool:
744 case DataType::Type::kUint8:
745 case DataType::Type::kInt8:
746 case DataType::Type::kUint16:
747 case DataType::Type::kInt16:
748 case DataType::Type::kInt32:
749 case DataType::Type::kInt64:
750 DCHECK_LE(2u, instruction->GetVectorLength());
751 DCHECK_LE(instruction->GetVectorLength(), 16u);
752 __ pandn(dst, src);
753 break;
754 case DataType::Type::kFloat32:
755 DCHECK_EQ(4u, instruction->GetVectorLength());
756 __ andnps(dst, src);
757 break;
758 case DataType::Type::kFloat64:
759 DCHECK_EQ(2u, instruction->GetVectorLength());
760 __ andnpd(dst, src);
761 break;
762 default:
763 LOG(FATAL) << "Unsupported SIMD type";
764 UNREACHABLE();
765 }
766 }
767
VisitVecOr(HVecOr * instruction)768 void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) {
769 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
770 }
771
VisitVecOr(HVecOr * instruction)772 void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
773 LocationSummary* locations = instruction->GetLocations();
774 DCHECK(locations->InAt(0).Equals(locations->Out()));
775 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
776 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
777 switch (instruction->GetPackedType()) {
778 case DataType::Type::kBool:
779 case DataType::Type::kUint8:
780 case DataType::Type::kInt8:
781 case DataType::Type::kUint16:
782 case DataType::Type::kInt16:
783 case DataType::Type::kInt32:
784 case DataType::Type::kInt64:
785 DCHECK_LE(2u, instruction->GetVectorLength());
786 DCHECK_LE(instruction->GetVectorLength(), 16u);
787 __ por(dst, src);
788 break;
789 case DataType::Type::kFloat32:
790 DCHECK_EQ(4u, instruction->GetVectorLength());
791 __ orps(dst, src);
792 break;
793 case DataType::Type::kFloat64:
794 DCHECK_EQ(2u, instruction->GetVectorLength());
795 __ orpd(dst, src);
796 break;
797 default:
798 LOG(FATAL) << "Unsupported SIMD type";
799 UNREACHABLE();
800 }
801 }
802
VisitVecXor(HVecXor * instruction)803 void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) {
804 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
805 }
806
VisitVecXor(HVecXor * instruction)807 void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
808 LocationSummary* locations = instruction->GetLocations();
809 DCHECK(locations->InAt(0).Equals(locations->Out()));
810 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
811 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
812 switch (instruction->GetPackedType()) {
813 case DataType::Type::kBool:
814 case DataType::Type::kUint8:
815 case DataType::Type::kInt8:
816 case DataType::Type::kUint16:
817 case DataType::Type::kInt16:
818 case DataType::Type::kInt32:
819 case DataType::Type::kInt64:
820 DCHECK_LE(2u, instruction->GetVectorLength());
821 DCHECK_LE(instruction->GetVectorLength(), 16u);
822 __ pxor(dst, src);
823 break;
824 case DataType::Type::kFloat32:
825 DCHECK_EQ(4u, instruction->GetVectorLength());
826 __ xorps(dst, src);
827 break;
828 case DataType::Type::kFloat64:
829 DCHECK_EQ(2u, instruction->GetVectorLength());
830 __ xorpd(dst, src);
831 break;
832 default:
833 LOG(FATAL) << "Unsupported SIMD type";
834 UNREACHABLE();
835 }
836 }
837
838 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)839 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
840 LocationSummary* locations = new (allocator) LocationSummary(instruction);
841 switch (instruction->GetPackedType()) {
842 case DataType::Type::kUint16:
843 case DataType::Type::kInt16:
844 case DataType::Type::kInt32:
845 case DataType::Type::kInt64:
846 locations->SetInAt(0, Location::RequiresFpuRegister());
847 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
848 locations->SetOut(Location::SameAsFirstInput());
849 break;
850 default:
851 LOG(FATAL) << "Unsupported SIMD type";
852 UNREACHABLE();
853 }
854 }
855
VisitVecShl(HVecShl * instruction)856 void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) {
857 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
858 }
859
VisitVecShl(HVecShl * instruction)860 void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
861 LocationSummary* locations = instruction->GetLocations();
862 DCHECK(locations->InAt(0).Equals(locations->Out()));
863 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
864 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
865 switch (instruction->GetPackedType()) {
866 case DataType::Type::kUint16:
867 case DataType::Type::kInt16:
868 DCHECK_EQ(8u, instruction->GetVectorLength());
869 __ psllw(dst, Immediate(static_cast<int8_t>(value)));
870 break;
871 case DataType::Type::kInt32:
872 DCHECK_EQ(4u, instruction->GetVectorLength());
873 __ pslld(dst, Immediate(static_cast<int8_t>(value)));
874 break;
875 case DataType::Type::kInt64:
876 DCHECK_EQ(2u, instruction->GetVectorLength());
877 __ psllq(dst, Immediate(static_cast<int8_t>(value)));
878 break;
879 default:
880 LOG(FATAL) << "Unsupported SIMD type";
881 UNREACHABLE();
882 }
883 }
884
VisitVecShr(HVecShr * instruction)885 void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) {
886 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
887 }
888
VisitVecShr(HVecShr * instruction)889 void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
890 LocationSummary* locations = instruction->GetLocations();
891 DCHECK(locations->InAt(0).Equals(locations->Out()));
892 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
893 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
894 switch (instruction->GetPackedType()) {
895 case DataType::Type::kUint16:
896 case DataType::Type::kInt16:
897 DCHECK_EQ(8u, instruction->GetVectorLength());
898 __ psraw(dst, Immediate(static_cast<int8_t>(value)));
899 break;
900 case DataType::Type::kInt32:
901 DCHECK_EQ(4u, instruction->GetVectorLength());
902 __ psrad(dst, Immediate(static_cast<int8_t>(value)));
903 break;
904 default:
905 LOG(FATAL) << "Unsupported SIMD type";
906 UNREACHABLE();
907 }
908 }
909
VisitVecUShr(HVecUShr * instruction)910 void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) {
911 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
912 }
913
VisitVecUShr(HVecUShr * instruction)914 void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
915 LocationSummary* locations = instruction->GetLocations();
916 DCHECK(locations->InAt(0).Equals(locations->Out()));
917 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
918 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
919 switch (instruction->GetPackedType()) {
920 case DataType::Type::kUint16:
921 case DataType::Type::kInt16:
922 DCHECK_EQ(8u, instruction->GetVectorLength());
923 __ psrlw(dst, Immediate(static_cast<int8_t>(value)));
924 break;
925 case DataType::Type::kInt32:
926 DCHECK_EQ(4u, instruction->GetVectorLength());
927 __ psrld(dst, Immediate(static_cast<int8_t>(value)));
928 break;
929 case DataType::Type::kInt64:
930 DCHECK_EQ(2u, instruction->GetVectorLength());
931 __ psrlq(dst, Immediate(static_cast<int8_t>(value)));
932 break;
933 default:
934 LOG(FATAL) << "Unsupported SIMD type";
935 UNREACHABLE();
936 }
937 }
938
VisitVecSetScalars(HVecSetScalars * instruction)939 void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
940 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
941
942 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
943
944 HInstruction* input = instruction->InputAt(0);
945 bool is_zero = IsZeroBitPattern(input);
946
947 switch (instruction->GetPackedType()) {
948 case DataType::Type::kBool:
949 case DataType::Type::kUint8:
950 case DataType::Type::kInt8:
951 case DataType::Type::kUint16:
952 case DataType::Type::kInt16:
953 case DataType::Type::kInt32:
954 case DataType::Type::kInt64:
955 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
956 : Location::RequiresRegister());
957 locations->SetOut(Location::RequiresFpuRegister());
958 break;
959 case DataType::Type::kFloat32:
960 case DataType::Type::kFloat64:
961 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
962 : Location::RequiresFpuRegister());
963 locations->SetOut(Location::RequiresFpuRegister());
964 break;
965 default:
966 LOG(FATAL) << "Unsupported SIMD type";
967 UNREACHABLE();
968 }
969 }
970
VisitVecSetScalars(HVecSetScalars * instruction)971 void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
972 LocationSummary* locations = instruction->GetLocations();
973 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
974
975 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
976
977 // Zero out all other elements first.
978 __ xorps(dst, dst);
979
980 // Shorthand for any type of zero.
981 if (IsZeroBitPattern(instruction->InputAt(0))) {
982 return;
983 }
984
985 // Set required elements.
986 switch (instruction->GetPackedType()) {
987 case DataType::Type::kBool:
988 case DataType::Type::kUint8:
989 case DataType::Type::kInt8:
990 case DataType::Type::kUint16:
991 case DataType::Type::kInt16: // TODO: up to here, and?
992 LOG(FATAL) << "Unsupported SIMD type";
993 UNREACHABLE();
994 case DataType::Type::kInt32:
995 DCHECK_EQ(4u, instruction->GetVectorLength());
996 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
997 break;
998 case DataType::Type::kInt64:
999 DCHECK_EQ(2u, instruction->GetVectorLength());
1000 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
1001 break;
1002 case DataType::Type::kFloat32:
1003 DCHECK_EQ(4u, instruction->GetVectorLength());
1004 __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
1005 break;
1006 case DataType::Type::kFloat64:
1007 DCHECK_EQ(2u, instruction->GetVectorLength());
1008 __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
1009 break;
1010 default:
1011 LOG(FATAL) << "Unsupported SIMD type";
1012 UNREACHABLE();
1013 }
1014 }
1015
1016 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1017 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1018 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1019 switch (instruction->GetPackedType()) {
1020 case DataType::Type::kUint8:
1021 case DataType::Type::kInt8:
1022 case DataType::Type::kUint16:
1023 case DataType::Type::kInt16:
1024 case DataType::Type::kInt32:
1025 case DataType::Type::kInt64:
1026 locations->SetInAt(0, Location::RequiresFpuRegister());
1027 locations->SetInAt(1, Location::RequiresFpuRegister());
1028 locations->SetInAt(2, Location::RequiresFpuRegister());
1029 locations->SetOut(Location::SameAsFirstInput());
1030 break;
1031 default:
1032 LOG(FATAL) << "Unsupported SIMD type";
1033 UNREACHABLE();
1034 }
1035 }
1036
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1037 void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1038 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1039 }
1040
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1041 void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1042 // TODO: pmaddwd?
1043 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1044 }
1045
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1046 void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1047 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1048 }
1049
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1050 void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1051 // TODO: psadbw for unsigned?
1052 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1053 }
1054
1055 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1056 static void CreateVecMemLocations(ArenaAllocator* allocator,
1057 HVecMemoryOperation* instruction,
1058 bool is_load) {
1059 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1060 switch (instruction->GetPackedType()) {
1061 case DataType::Type::kBool:
1062 case DataType::Type::kUint8:
1063 case DataType::Type::kInt8:
1064 case DataType::Type::kUint16:
1065 case DataType::Type::kInt16:
1066 case DataType::Type::kInt32:
1067 case DataType::Type::kInt64:
1068 case DataType::Type::kFloat32:
1069 case DataType::Type::kFloat64:
1070 locations->SetInAt(0, Location::RequiresRegister());
1071 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1072 if (is_load) {
1073 locations->SetOut(Location::RequiresFpuRegister());
1074 } else {
1075 locations->SetInAt(2, Location::RequiresFpuRegister());
1076 }
1077 break;
1078 default:
1079 LOG(FATAL) << "Unsupported SIMD type";
1080 UNREACHABLE();
1081 }
1082 }
1083
1084 // Helper to construct address for vector memory operations.
VecAddress(LocationSummary * locations,size_t size,bool is_string_char_at)1085 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
1086 Location base = locations->InAt(0);
1087 Location index = locations->InAt(1);
1088 ScaleFactor scale = TIMES_1;
1089 switch (size) {
1090 case 2: scale = TIMES_2; break;
1091 case 4: scale = TIMES_4; break;
1092 case 8: scale = TIMES_8; break;
1093 default: break;
1094 }
1095 // Incorporate the string or array offset in the address computation.
1096 uint32_t offset = is_string_char_at
1097 ? mirror::String::ValueOffset().Uint32Value()
1098 : mirror::Array::DataOffset(size).Uint32Value();
1099 return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
1100 }
1101
VisitVecLoad(HVecLoad * instruction)1102 void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
1103 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1104 // String load requires a temporary for the compressed load.
1105 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1106 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1107 }
1108 }
1109
VisitVecLoad(HVecLoad * instruction)1110 void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
1111 LocationSummary* locations = instruction->GetLocations();
1112 size_t size = DataType::Size(instruction->GetPackedType());
1113 Address address = VecAddress(locations, size, instruction->IsStringCharAt());
1114 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
1115 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1116 switch (instruction->GetPackedType()) {
1117 case DataType::Type::kUint16:
1118 DCHECK_EQ(8u, instruction->GetVectorLength());
1119 // Special handling of compressed/uncompressed string load.
1120 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1121 NearLabel done, not_compressed;
1122 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1123 // Test compression bit.
1124 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1125 "Expecting 0=compressed, 1=uncompressed");
1126 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1127 __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
1128 __ j(kNotZero, ¬_compressed);
1129 // Zero extend 8 compressed bytes into 8 chars.
1130 __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
1131 __ pxor(tmp, tmp);
1132 __ punpcklbw(reg, tmp);
1133 __ jmp(&done);
1134 // Load 8 direct uncompressed chars.
1135 __ Bind(¬_compressed);
1136 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1137 __ Bind(&done);
1138 return;
1139 }
1140 FALLTHROUGH_INTENDED;
1141 case DataType::Type::kBool:
1142 case DataType::Type::kUint8:
1143 case DataType::Type::kInt8:
1144 case DataType::Type::kInt16:
1145 case DataType::Type::kInt32:
1146 case DataType::Type::kInt64:
1147 DCHECK_LE(2u, instruction->GetVectorLength());
1148 DCHECK_LE(instruction->GetVectorLength(), 16u);
1149 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1150 break;
1151 case DataType::Type::kFloat32:
1152 DCHECK_EQ(4u, instruction->GetVectorLength());
1153 is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
1154 break;
1155 case DataType::Type::kFloat64:
1156 DCHECK_EQ(2u, instruction->GetVectorLength());
1157 is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
1158 break;
1159 default:
1160 LOG(FATAL) << "Unsupported SIMD type";
1161 UNREACHABLE();
1162 }
1163 }
1164
VisitVecStore(HVecStore * instruction)1165 void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
1166 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1167 }
1168
VisitVecStore(HVecStore * instruction)1169 void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
1170 LocationSummary* locations = instruction->GetLocations();
1171 size_t size = DataType::Size(instruction->GetPackedType());
1172 Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
1173 XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
1174 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1175 switch (instruction->GetPackedType()) {
1176 case DataType::Type::kBool:
1177 case DataType::Type::kUint8:
1178 case DataType::Type::kInt8:
1179 case DataType::Type::kUint16:
1180 case DataType::Type::kInt16:
1181 case DataType::Type::kInt32:
1182 case DataType::Type::kInt64:
1183 DCHECK_LE(2u, instruction->GetVectorLength());
1184 DCHECK_LE(instruction->GetVectorLength(), 16u);
1185 is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
1186 break;
1187 case DataType::Type::kFloat32:
1188 DCHECK_EQ(4u, instruction->GetVectorLength());
1189 is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
1190 break;
1191 case DataType::Type::kFloat64:
1192 DCHECK_EQ(2u, instruction->GetVectorLength());
1193 is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
1194 break;
1195 default:
1196 LOG(FATAL) << "Unsupported SIMD type";
1197 UNREACHABLE();
1198 }
1199 }
1200
1201 #undef __
1202
1203 } // namespace x86_64
1204 } // namespace art
1205