1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "mirror/array-inl.h"
20 #include "mirror/string.h"
21
22 namespace art {
23 namespace x86_64 {
24
25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
26 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
27
VisitVecReplicateScalar(HVecReplicateScalar * instruction)28 void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
29 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
30 HInstruction* input = instruction->InputAt(0);
31 bool is_zero = IsZeroBitPattern(input);
32 switch (instruction->GetPackedType()) {
33 case DataType::Type::kBool:
34 case DataType::Type::kUint8:
35 case DataType::Type::kInt8:
36 case DataType::Type::kUint16:
37 case DataType::Type::kInt16:
38 case DataType::Type::kInt32:
39 case DataType::Type::kInt64:
40 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
41 : Location::RequiresRegister());
42 locations->SetOut(Location::RequiresFpuRegister());
43 break;
44 case DataType::Type::kFloat32:
45 case DataType::Type::kFloat64:
46 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
47 : Location::RequiresFpuRegister());
48 locations->SetOut(is_zero ? Location::RequiresFpuRegister()
49 : Location::SameAsFirstInput());
50 break;
51 default:
52 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
53 UNREACHABLE();
54 }
55 }
56
VisitVecReplicateScalar(HVecReplicateScalar * instruction)57 void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
58 LocationSummary* locations = instruction->GetLocations();
59 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
60
61 // Shorthand for any type of zero.
62 if (IsZeroBitPattern(instruction->InputAt(0))) {
63 __ xorps(dst, dst);
64 return;
65 }
66
67 switch (instruction->GetPackedType()) {
68 case DataType::Type::kBool:
69 case DataType::Type::kUint8:
70 case DataType::Type::kInt8:
71 DCHECK_EQ(16u, instruction->GetVectorLength());
72 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
73 __ punpcklbw(dst, dst);
74 __ punpcklwd(dst, dst);
75 __ pshufd(dst, dst, Immediate(0));
76 break;
77 case DataType::Type::kUint16:
78 case DataType::Type::kInt16:
79 DCHECK_EQ(8u, instruction->GetVectorLength());
80 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
81 __ punpcklwd(dst, dst);
82 __ pshufd(dst, dst, Immediate(0));
83 break;
84 case DataType::Type::kInt32:
85 DCHECK_EQ(4u, instruction->GetVectorLength());
86 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
87 __ pshufd(dst, dst, Immediate(0));
88 break;
89 case DataType::Type::kInt64:
90 DCHECK_EQ(2u, instruction->GetVectorLength());
91 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ true);
92 __ punpcklqdq(dst, dst);
93 break;
94 case DataType::Type::kFloat32:
95 DCHECK_EQ(4u, instruction->GetVectorLength());
96 DCHECK(locations->InAt(0).Equals(locations->Out()));
97 __ shufps(dst, dst, Immediate(0));
98 break;
99 case DataType::Type::kFloat64:
100 DCHECK_EQ(2u, instruction->GetVectorLength());
101 DCHECK(locations->InAt(0).Equals(locations->Out()));
102 __ shufpd(dst, dst, Immediate(0));
103 break;
104 default:
105 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
106 UNREACHABLE();
107 }
108 }
109
VisitVecExtractScalar(HVecExtractScalar * instruction)110 void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
111 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
112 switch (instruction->GetPackedType()) {
113 case DataType::Type::kBool:
114 case DataType::Type::kUint8:
115 case DataType::Type::kInt8:
116 case DataType::Type::kUint16:
117 case DataType::Type::kInt16:
118 case DataType::Type::kInt32:
119 case DataType::Type::kInt64:
120 locations->SetInAt(0, Location::RequiresFpuRegister());
121 locations->SetOut(Location::RequiresRegister());
122 break;
123 case DataType::Type::kFloat32:
124 case DataType::Type::kFloat64:
125 locations->SetInAt(0, Location::RequiresFpuRegister());
126 locations->SetOut(Location::SameAsFirstInput());
127 break;
128 default:
129 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
130 UNREACHABLE();
131 }
132 }
133
VisitVecExtractScalar(HVecExtractScalar * instruction)134 void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
135 LocationSummary* locations = instruction->GetLocations();
136 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
137 switch (instruction->GetPackedType()) {
138 case DataType::Type::kBool:
139 case DataType::Type::kUint8:
140 case DataType::Type::kInt8:
141 case DataType::Type::kUint16:
142 case DataType::Type::kInt16: // TODO: up to here, and?
143 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
144 UNREACHABLE();
145 case DataType::Type::kInt32:
146 DCHECK_EQ(4u, instruction->GetVectorLength());
147 __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ false);
148 break;
149 case DataType::Type::kInt64:
150 DCHECK_EQ(2u, instruction->GetVectorLength());
151 __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ true);
152 break;
153 case DataType::Type::kFloat32:
154 case DataType::Type::kFloat64:
155 DCHECK_LE(2u, instruction->GetVectorLength());
156 DCHECK_LE(instruction->GetVectorLength(), 4u);
157 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
158 break;
159 default:
160 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
161 UNREACHABLE();
162 }
163 }
164
165 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)166 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
167 LocationSummary* locations = new (allocator) LocationSummary(instruction);
168 switch (instruction->GetPackedType()) {
169 case DataType::Type::kBool:
170 case DataType::Type::kUint8:
171 case DataType::Type::kInt8:
172 case DataType::Type::kUint16:
173 case DataType::Type::kInt16:
174 case DataType::Type::kInt32:
175 case DataType::Type::kInt64:
176 case DataType::Type::kFloat32:
177 case DataType::Type::kFloat64:
178 locations->SetInAt(0, Location::RequiresFpuRegister());
179 locations->SetOut(Location::RequiresFpuRegister());
180 break;
181 default:
182 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
183 UNREACHABLE();
184 }
185 }
186
VisitVecReduce(HVecReduce * instruction)187 void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
188 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
189 // Long reduction or min/max require a temporary.
190 if (instruction->GetPackedType() == DataType::Type::kInt64 ||
191 instruction->GetReductionKind() == HVecReduce::kMin ||
192 instruction->GetReductionKind() == HVecReduce::kMax) {
193 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
194 }
195 }
196
VisitVecReduce(HVecReduce * instruction)197 void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
198 LocationSummary* locations = instruction->GetLocations();
199 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
200 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
201 switch (instruction->GetPackedType()) {
202 case DataType::Type::kInt32:
203 DCHECK_EQ(4u, instruction->GetVectorLength());
204 switch (instruction->GetReductionKind()) {
205 case HVecReduce::kSum:
206 __ movaps(dst, src);
207 __ phaddd(dst, dst);
208 __ phaddd(dst, dst);
209 break;
210 case HVecReduce::kMin:
211 case HVecReduce::kMax:
212 // Historical note: We've had a broken implementation here. b/117863065
213 // Do not draw on the old code if we ever want to bring MIN/MAX reduction back.
214 LOG(FATAL) << "Unsupported reduction type.";
215 }
216 break;
217 case DataType::Type::kInt64: {
218 DCHECK_EQ(2u, instruction->GetVectorLength());
219 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
220 switch (instruction->GetReductionKind()) {
221 case HVecReduce::kSum:
222 __ movaps(tmp, src);
223 __ movaps(dst, src);
224 __ punpckhqdq(tmp, tmp);
225 __ paddq(dst, tmp);
226 break;
227 case HVecReduce::kMin:
228 case HVecReduce::kMax:
229 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
230 }
231 break;
232 }
233 default:
234 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
235 UNREACHABLE();
236 }
237 }
238
VisitVecCnv(HVecCnv * instruction)239 void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
240 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
241 }
242
VisitVecCnv(HVecCnv * instruction)243 void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
244 LocationSummary* locations = instruction->GetLocations();
245 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
246 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
247 DataType::Type from = instruction->GetInputType();
248 DataType::Type to = instruction->GetResultType();
249 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
250 DCHECK_EQ(4u, instruction->GetVectorLength());
251 __ cvtdq2ps(dst, src);
252 } else {
253 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
254 }
255 }
256
VisitVecNeg(HVecNeg * instruction)257 void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) {
258 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
259 }
260
VisitVecNeg(HVecNeg * instruction)261 void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
262 LocationSummary* locations = instruction->GetLocations();
263 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
264 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
265 switch (instruction->GetPackedType()) {
266 case DataType::Type::kUint8:
267 case DataType::Type::kInt8:
268 DCHECK_EQ(16u, instruction->GetVectorLength());
269 __ pxor(dst, dst);
270 __ psubb(dst, src);
271 break;
272 case DataType::Type::kUint16:
273 case DataType::Type::kInt16:
274 DCHECK_EQ(8u, instruction->GetVectorLength());
275 __ pxor(dst, dst);
276 __ psubw(dst, src);
277 break;
278 case DataType::Type::kInt32:
279 DCHECK_EQ(4u, instruction->GetVectorLength());
280 __ pxor(dst, dst);
281 __ psubd(dst, src);
282 break;
283 case DataType::Type::kInt64:
284 DCHECK_EQ(2u, instruction->GetVectorLength());
285 __ pxor(dst, dst);
286 __ psubq(dst, src);
287 break;
288 case DataType::Type::kFloat32:
289 DCHECK_EQ(4u, instruction->GetVectorLength());
290 __ xorps(dst, dst);
291 __ subps(dst, src);
292 break;
293 case DataType::Type::kFloat64:
294 DCHECK_EQ(2u, instruction->GetVectorLength());
295 __ xorpd(dst, dst);
296 __ subpd(dst, src);
297 break;
298 default:
299 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
300 UNREACHABLE();
301 }
302 }
303
VisitVecAbs(HVecAbs * instruction)304 void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
305 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
306 // Integral-abs requires a temporary for the comparison.
307 if (instruction->GetPackedType() == DataType::Type::kInt32) {
308 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
309 }
310 }
311
VisitVecAbs(HVecAbs * instruction)312 void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
313 LocationSummary* locations = instruction->GetLocations();
314 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
315 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
316 switch (instruction->GetPackedType()) {
317 case DataType::Type::kInt32: {
318 DCHECK_EQ(4u, instruction->GetVectorLength());
319 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
320 __ movaps(dst, src);
321 __ pxor(tmp, tmp);
322 __ pcmpgtd(tmp, dst);
323 __ pxor(dst, tmp);
324 __ psubd(dst, tmp);
325 break;
326 }
327 case DataType::Type::kFloat32:
328 DCHECK_EQ(4u, instruction->GetVectorLength());
329 __ pcmpeqb(dst, dst); // all ones
330 __ psrld(dst, Immediate(1));
331 __ andps(dst, src);
332 break;
333 case DataType::Type::kFloat64:
334 DCHECK_EQ(2u, instruction->GetVectorLength());
335 __ pcmpeqb(dst, dst); // all ones
336 __ psrlq(dst, Immediate(1));
337 __ andpd(dst, src);
338 break;
339 default:
340 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
341 UNREACHABLE();
342 }
343 }
344
VisitVecNot(HVecNot * instruction)345 void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
346 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
347 // Boolean-not requires a temporary to construct the 16 x one.
348 if (instruction->GetPackedType() == DataType::Type::kBool) {
349 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
350 }
351 }
352
VisitVecNot(HVecNot * instruction)353 void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
354 LocationSummary* locations = instruction->GetLocations();
355 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
356 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
357 switch (instruction->GetPackedType()) {
358 case DataType::Type::kBool: { // special case boolean-not
359 DCHECK_EQ(16u, instruction->GetVectorLength());
360 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
361 __ pxor(dst, dst);
362 __ pcmpeqb(tmp, tmp); // all ones
363 __ psubb(dst, tmp); // 16 x one
364 __ pxor(dst, src);
365 break;
366 }
367 case DataType::Type::kUint8:
368 case DataType::Type::kInt8:
369 case DataType::Type::kUint16:
370 case DataType::Type::kInt16:
371 case DataType::Type::kInt32:
372 case DataType::Type::kInt64:
373 DCHECK_LE(2u, instruction->GetVectorLength());
374 DCHECK_LE(instruction->GetVectorLength(), 16u);
375 __ pcmpeqb(dst, dst); // all ones
376 __ pxor(dst, src);
377 break;
378 case DataType::Type::kFloat32:
379 DCHECK_EQ(4u, instruction->GetVectorLength());
380 __ pcmpeqb(dst, dst); // all ones
381 __ xorps(dst, src);
382 break;
383 case DataType::Type::kFloat64:
384 DCHECK_EQ(2u, instruction->GetVectorLength());
385 __ pcmpeqb(dst, dst); // all ones
386 __ xorpd(dst, src);
387 break;
388 default:
389 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
390 UNREACHABLE();
391 }
392 }
393
394 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)395 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
396 LocationSummary* locations = new (allocator) LocationSummary(instruction);
397 switch (instruction->GetPackedType()) {
398 case DataType::Type::kBool:
399 case DataType::Type::kUint8:
400 case DataType::Type::kInt8:
401 case DataType::Type::kUint16:
402 case DataType::Type::kInt16:
403 case DataType::Type::kInt32:
404 case DataType::Type::kInt64:
405 case DataType::Type::kFloat32:
406 case DataType::Type::kFloat64:
407 locations->SetInAt(0, Location::RequiresFpuRegister());
408 locations->SetInAt(1, Location::RequiresFpuRegister());
409 locations->SetOut(Location::SameAsFirstInput());
410 break;
411 default:
412 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
413 UNREACHABLE();
414 }
415 }
416
VisitVecAdd(HVecAdd * instruction)417 void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
418 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
419 }
420
VisitVecAdd(HVecAdd * instruction)421 void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
422 LocationSummary* locations = instruction->GetLocations();
423 DCHECK(locations->InAt(0).Equals(locations->Out()));
424 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
425 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
426 switch (instruction->GetPackedType()) {
427 case DataType::Type::kUint8:
428 case DataType::Type::kInt8:
429 DCHECK_EQ(16u, instruction->GetVectorLength());
430 __ paddb(dst, src);
431 break;
432 case DataType::Type::kUint16:
433 case DataType::Type::kInt16:
434 DCHECK_EQ(8u, instruction->GetVectorLength());
435 __ paddw(dst, src);
436 break;
437 case DataType::Type::kInt32:
438 DCHECK_EQ(4u, instruction->GetVectorLength());
439 __ paddd(dst, src);
440 break;
441 case DataType::Type::kInt64:
442 DCHECK_EQ(2u, instruction->GetVectorLength());
443 __ paddq(dst, src);
444 break;
445 case DataType::Type::kFloat32:
446 DCHECK_EQ(4u, instruction->GetVectorLength());
447 __ addps(dst, src);
448 break;
449 case DataType::Type::kFloat64:
450 DCHECK_EQ(2u, instruction->GetVectorLength());
451 __ addpd(dst, src);
452 break;
453 default:
454 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
455 UNREACHABLE();
456 }
457 }
458
VisitVecSaturationAdd(HVecSaturationAdd * instruction)459 void LocationsBuilderX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
460 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
461 }
462
VisitVecSaturationAdd(HVecSaturationAdd * instruction)463 void InstructionCodeGeneratorX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
464 LocationSummary* locations = instruction->GetLocations();
465 DCHECK(locations->InAt(0).Equals(locations->Out()));
466 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
467 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
468 switch (instruction->GetPackedType()) {
469 case DataType::Type::kUint8:
470 DCHECK_EQ(16u, instruction->GetVectorLength());
471 __ paddusb(dst, src);
472 break;
473 case DataType::Type::kInt8:
474 DCHECK_EQ(16u, instruction->GetVectorLength());
475 __ paddsb(dst, src);
476 break;
477 case DataType::Type::kUint16:
478 DCHECK_EQ(8u, instruction->GetVectorLength());
479 __ paddusw(dst, src);
480 break;
481 case DataType::Type::kInt16:
482 DCHECK_EQ(8u, instruction->GetVectorLength());
483 __ paddsw(dst, src);
484 break;
485 default:
486 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
487 UNREACHABLE();
488 }
489 }
490
VisitVecHalvingAdd(HVecHalvingAdd * instruction)491 void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
492 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
493 }
494
VisitVecHalvingAdd(HVecHalvingAdd * instruction)495 void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
496 LocationSummary* locations = instruction->GetLocations();
497 DCHECK(locations->InAt(0).Equals(locations->Out()));
498 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
499 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
500
501 DCHECK(instruction->IsRounded());
502
503 switch (instruction->GetPackedType()) {
504 case DataType::Type::kUint8:
505 DCHECK_EQ(16u, instruction->GetVectorLength());
506 __ pavgb(dst, src);
507 break;
508 case DataType::Type::kUint16:
509 DCHECK_EQ(8u, instruction->GetVectorLength());
510 __ pavgw(dst, src);
511 break;
512 default:
513 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
514 UNREACHABLE();
515 }
516 }
517
VisitVecSub(HVecSub * instruction)518 void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
519 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
520 }
521
VisitVecSub(HVecSub * instruction)522 void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
523 LocationSummary* locations = instruction->GetLocations();
524 DCHECK(locations->InAt(0).Equals(locations->Out()));
525 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
526 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
527 switch (instruction->GetPackedType()) {
528 case DataType::Type::kUint8:
529 case DataType::Type::kInt8:
530 DCHECK_EQ(16u, instruction->GetVectorLength());
531 __ psubb(dst, src);
532 break;
533 case DataType::Type::kUint16:
534 case DataType::Type::kInt16:
535 DCHECK_EQ(8u, instruction->GetVectorLength());
536 __ psubw(dst, src);
537 break;
538 case DataType::Type::kInt32:
539 DCHECK_EQ(4u, instruction->GetVectorLength());
540 __ psubd(dst, src);
541 break;
542 case DataType::Type::kInt64:
543 DCHECK_EQ(2u, instruction->GetVectorLength());
544 __ psubq(dst, src);
545 break;
546 case DataType::Type::kFloat32:
547 DCHECK_EQ(4u, instruction->GetVectorLength());
548 __ subps(dst, src);
549 break;
550 case DataType::Type::kFloat64:
551 DCHECK_EQ(2u, instruction->GetVectorLength());
552 __ subpd(dst, src);
553 break;
554 default:
555 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
556 UNREACHABLE();
557 }
558 }
559
VisitVecSaturationSub(HVecSaturationSub * instruction)560 void LocationsBuilderX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
561 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
562 }
563
VisitVecSaturationSub(HVecSaturationSub * instruction)564 void InstructionCodeGeneratorX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
565 LocationSummary* locations = instruction->GetLocations();
566 DCHECK(locations->InAt(0).Equals(locations->Out()));
567 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
568 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
569 switch (instruction->GetPackedType()) {
570 case DataType::Type::kUint8:
571 DCHECK_EQ(16u, instruction->GetVectorLength());
572 __ psubusb(dst, src);
573 break;
574 case DataType::Type::kInt8:
575 DCHECK_EQ(16u, instruction->GetVectorLength());
576 __ psubsb(dst, src);
577 break;
578 case DataType::Type::kUint16:
579 DCHECK_EQ(8u, instruction->GetVectorLength());
580 __ psubusw(dst, src);
581 break;
582 case DataType::Type::kInt16:
583 DCHECK_EQ(8u, instruction->GetVectorLength());
584 __ psubsw(dst, src);
585 break;
586 default:
587 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
588 UNREACHABLE();
589 }
590 }
591
VisitVecMul(HVecMul * instruction)592 void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
593 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
594 }
595
VisitVecMul(HVecMul * instruction)596 void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
597 LocationSummary* locations = instruction->GetLocations();
598 DCHECK(locations->InAt(0).Equals(locations->Out()));
599 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
600 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
601 switch (instruction->GetPackedType()) {
602 case DataType::Type::kUint16:
603 case DataType::Type::kInt16:
604 DCHECK_EQ(8u, instruction->GetVectorLength());
605 __ pmullw(dst, src);
606 break;
607 case DataType::Type::kInt32:
608 DCHECK_EQ(4u, instruction->GetVectorLength());
609 __ pmulld(dst, src);
610 break;
611 case DataType::Type::kFloat32:
612 DCHECK_EQ(4u, instruction->GetVectorLength());
613 __ mulps(dst, src);
614 break;
615 case DataType::Type::kFloat64:
616 DCHECK_EQ(2u, instruction->GetVectorLength());
617 __ mulpd(dst, src);
618 break;
619 default:
620 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
621 UNREACHABLE();
622 }
623 }
624
VisitVecDiv(HVecDiv * instruction)625 void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
626 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
627 }
628
VisitVecDiv(HVecDiv * instruction)629 void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
630 LocationSummary* locations = instruction->GetLocations();
631 DCHECK(locations->InAt(0).Equals(locations->Out()));
632 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
633 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
634 switch (instruction->GetPackedType()) {
635 case DataType::Type::kFloat32:
636 DCHECK_EQ(4u, instruction->GetVectorLength());
637 __ divps(dst, src);
638 break;
639 case DataType::Type::kFloat64:
640 DCHECK_EQ(2u, instruction->GetVectorLength());
641 __ divpd(dst, src);
642 break;
643 default:
644 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
645 UNREACHABLE();
646 }
647 }
648
VisitVecMin(HVecMin * instruction)649 void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
650 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
651 }
652
VisitVecMin(HVecMin * instruction)653 void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
654 LocationSummary* locations = instruction->GetLocations();
655 DCHECK(locations->InAt(0).Equals(locations->Out()));
656 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
657 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
658 switch (instruction->GetPackedType()) {
659 case DataType::Type::kUint8:
660 DCHECK_EQ(16u, instruction->GetVectorLength());
661 __ pminub(dst, src);
662 break;
663 case DataType::Type::kInt8:
664 DCHECK_EQ(16u, instruction->GetVectorLength());
665 __ pminsb(dst, src);
666 break;
667 case DataType::Type::kUint16:
668 DCHECK_EQ(8u, instruction->GetVectorLength());
669 __ pminuw(dst, src);
670 break;
671 case DataType::Type::kInt16:
672 DCHECK_EQ(8u, instruction->GetVectorLength());
673 __ pminsw(dst, src);
674 break;
675 case DataType::Type::kUint32:
676 DCHECK_EQ(4u, instruction->GetVectorLength());
677 __ pminud(dst, src);
678 break;
679 case DataType::Type::kInt32:
680 DCHECK_EQ(4u, instruction->GetVectorLength());
681 __ pminsd(dst, src);
682 break;
683 // Next cases are sloppy wrt 0.0 vs -0.0.
684 case DataType::Type::kFloat32:
685 DCHECK_EQ(4u, instruction->GetVectorLength());
686 __ minps(dst, src);
687 break;
688 case DataType::Type::kFloat64:
689 DCHECK_EQ(2u, instruction->GetVectorLength());
690 __ minpd(dst, src);
691 break;
692 default:
693 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
694 UNREACHABLE();
695 }
696 }
697
VisitVecMax(HVecMax * instruction)698 void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
699 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
700 }
701
VisitVecMax(HVecMax * instruction)702 void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
703 LocationSummary* locations = instruction->GetLocations();
704 DCHECK(locations->InAt(0).Equals(locations->Out()));
705 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
706 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
707 switch (instruction->GetPackedType()) {
708 case DataType::Type::kUint8:
709 DCHECK_EQ(16u, instruction->GetVectorLength());
710 __ pmaxub(dst, src);
711 break;
712 case DataType::Type::kInt8:
713 DCHECK_EQ(16u, instruction->GetVectorLength());
714 __ pmaxsb(dst, src);
715 break;
716 case DataType::Type::kUint16:
717 DCHECK_EQ(8u, instruction->GetVectorLength());
718 __ pmaxuw(dst, src);
719 break;
720 case DataType::Type::kInt16:
721 DCHECK_EQ(8u, instruction->GetVectorLength());
722 __ pmaxsw(dst, src);
723 break;
724 case DataType::Type::kUint32:
725 DCHECK_EQ(4u, instruction->GetVectorLength());
726 __ pmaxud(dst, src);
727 break;
728 case DataType::Type::kInt32:
729 DCHECK_EQ(4u, instruction->GetVectorLength());
730 __ pmaxsd(dst, src);
731 break;
732 // Next cases are sloppy wrt 0.0 vs -0.0.
733 case DataType::Type::kFloat32:
734 DCHECK_EQ(4u, instruction->GetVectorLength());
735 __ maxps(dst, src);
736 break;
737 case DataType::Type::kFloat64:
738 DCHECK_EQ(2u, instruction->GetVectorLength());
739 __ maxpd(dst, src);
740 break;
741 default:
742 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
743 UNREACHABLE();
744 }
745 }
746
VisitVecAnd(HVecAnd * instruction)747 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
748 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
749 }
750
VisitVecAnd(HVecAnd * instruction)751 void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
752 LocationSummary* locations = instruction->GetLocations();
753 DCHECK(locations->InAt(0).Equals(locations->Out()));
754 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
755 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
756 switch (instruction->GetPackedType()) {
757 case DataType::Type::kBool:
758 case DataType::Type::kUint8:
759 case DataType::Type::kInt8:
760 case DataType::Type::kUint16:
761 case DataType::Type::kInt16:
762 case DataType::Type::kInt32:
763 case DataType::Type::kInt64:
764 DCHECK_LE(2u, instruction->GetVectorLength());
765 DCHECK_LE(instruction->GetVectorLength(), 16u);
766 __ pand(dst, src);
767 break;
768 case DataType::Type::kFloat32:
769 DCHECK_EQ(4u, instruction->GetVectorLength());
770 __ andps(dst, src);
771 break;
772 case DataType::Type::kFloat64:
773 DCHECK_EQ(2u, instruction->GetVectorLength());
774 __ andpd(dst, src);
775 break;
776 default:
777 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
778 UNREACHABLE();
779 }
780 }
781
VisitVecAndNot(HVecAndNot * instruction)782 void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) {
783 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
784 }
785
VisitVecAndNot(HVecAndNot * instruction)786 void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
787 LocationSummary* locations = instruction->GetLocations();
788 DCHECK(locations->InAt(0).Equals(locations->Out()));
789 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
790 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
791 switch (instruction->GetPackedType()) {
792 case DataType::Type::kBool:
793 case DataType::Type::kUint8:
794 case DataType::Type::kInt8:
795 case DataType::Type::kUint16:
796 case DataType::Type::kInt16:
797 case DataType::Type::kInt32:
798 case DataType::Type::kInt64:
799 DCHECK_LE(2u, instruction->GetVectorLength());
800 DCHECK_LE(instruction->GetVectorLength(), 16u);
801 __ pandn(dst, src);
802 break;
803 case DataType::Type::kFloat32:
804 DCHECK_EQ(4u, instruction->GetVectorLength());
805 __ andnps(dst, src);
806 break;
807 case DataType::Type::kFloat64:
808 DCHECK_EQ(2u, instruction->GetVectorLength());
809 __ andnpd(dst, src);
810 break;
811 default:
812 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
813 UNREACHABLE();
814 }
815 }
816
VisitVecOr(HVecOr * instruction)817 void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) {
818 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
819 }
820
VisitVecOr(HVecOr * instruction)821 void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
822 LocationSummary* locations = instruction->GetLocations();
823 DCHECK(locations->InAt(0).Equals(locations->Out()));
824 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
825 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
826 switch (instruction->GetPackedType()) {
827 case DataType::Type::kBool:
828 case DataType::Type::kUint8:
829 case DataType::Type::kInt8:
830 case DataType::Type::kUint16:
831 case DataType::Type::kInt16:
832 case DataType::Type::kInt32:
833 case DataType::Type::kInt64:
834 DCHECK_LE(2u, instruction->GetVectorLength());
835 DCHECK_LE(instruction->GetVectorLength(), 16u);
836 __ por(dst, src);
837 break;
838 case DataType::Type::kFloat32:
839 DCHECK_EQ(4u, instruction->GetVectorLength());
840 __ orps(dst, src);
841 break;
842 case DataType::Type::kFloat64:
843 DCHECK_EQ(2u, instruction->GetVectorLength());
844 __ orpd(dst, src);
845 break;
846 default:
847 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
848 UNREACHABLE();
849 }
850 }
851
VisitVecXor(HVecXor * instruction)852 void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) {
853 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
854 }
855
VisitVecXor(HVecXor * instruction)856 void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
857 LocationSummary* locations = instruction->GetLocations();
858 DCHECK(locations->InAt(0).Equals(locations->Out()));
859 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
860 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
861 switch (instruction->GetPackedType()) {
862 case DataType::Type::kBool:
863 case DataType::Type::kUint8:
864 case DataType::Type::kInt8:
865 case DataType::Type::kUint16:
866 case DataType::Type::kInt16:
867 case DataType::Type::kInt32:
868 case DataType::Type::kInt64:
869 DCHECK_LE(2u, instruction->GetVectorLength());
870 DCHECK_LE(instruction->GetVectorLength(), 16u);
871 __ pxor(dst, src);
872 break;
873 case DataType::Type::kFloat32:
874 DCHECK_EQ(4u, instruction->GetVectorLength());
875 __ xorps(dst, src);
876 break;
877 case DataType::Type::kFloat64:
878 DCHECK_EQ(2u, instruction->GetVectorLength());
879 __ xorpd(dst, src);
880 break;
881 default:
882 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
883 UNREACHABLE();
884 }
885 }
886
887 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)888 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
889 LocationSummary* locations = new (allocator) LocationSummary(instruction);
890 switch (instruction->GetPackedType()) {
891 case DataType::Type::kUint16:
892 case DataType::Type::kInt16:
893 case DataType::Type::kInt32:
894 case DataType::Type::kInt64:
895 locations->SetInAt(0, Location::RequiresFpuRegister());
896 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
897 locations->SetOut(Location::SameAsFirstInput());
898 break;
899 default:
900 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
901 UNREACHABLE();
902 }
903 }
904
VisitVecShl(HVecShl * instruction)905 void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) {
906 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
907 }
908
VisitVecShl(HVecShl * instruction)909 void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
910 LocationSummary* locations = instruction->GetLocations();
911 DCHECK(locations->InAt(0).Equals(locations->Out()));
912 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
913 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
914 switch (instruction->GetPackedType()) {
915 case DataType::Type::kUint16:
916 case DataType::Type::kInt16:
917 DCHECK_EQ(8u, instruction->GetVectorLength());
918 __ psllw(dst, Immediate(static_cast<int8_t>(value)));
919 break;
920 case DataType::Type::kInt32:
921 DCHECK_EQ(4u, instruction->GetVectorLength());
922 __ pslld(dst, Immediate(static_cast<int8_t>(value)));
923 break;
924 case DataType::Type::kInt64:
925 DCHECK_EQ(2u, instruction->GetVectorLength());
926 __ psllq(dst, Immediate(static_cast<int8_t>(value)));
927 break;
928 default:
929 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
930 UNREACHABLE();
931 }
932 }
933
VisitVecShr(HVecShr * instruction)934 void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) {
935 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
936 }
937
VisitVecShr(HVecShr * instruction)938 void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
939 LocationSummary* locations = instruction->GetLocations();
940 DCHECK(locations->InAt(0).Equals(locations->Out()));
941 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
942 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
943 switch (instruction->GetPackedType()) {
944 case DataType::Type::kUint16:
945 case DataType::Type::kInt16:
946 DCHECK_EQ(8u, instruction->GetVectorLength());
947 __ psraw(dst, Immediate(static_cast<int8_t>(value)));
948 break;
949 case DataType::Type::kInt32:
950 DCHECK_EQ(4u, instruction->GetVectorLength());
951 __ psrad(dst, Immediate(static_cast<int8_t>(value)));
952 break;
953 default:
954 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
955 UNREACHABLE();
956 }
957 }
958
VisitVecUShr(HVecUShr * instruction)959 void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) {
960 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
961 }
962
VisitVecUShr(HVecUShr * instruction)963 void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
964 LocationSummary* locations = instruction->GetLocations();
965 DCHECK(locations->InAt(0).Equals(locations->Out()));
966 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
967 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
968 switch (instruction->GetPackedType()) {
969 case DataType::Type::kUint16:
970 case DataType::Type::kInt16:
971 DCHECK_EQ(8u, instruction->GetVectorLength());
972 __ psrlw(dst, Immediate(static_cast<int8_t>(value)));
973 break;
974 case DataType::Type::kInt32:
975 DCHECK_EQ(4u, instruction->GetVectorLength());
976 __ psrld(dst, Immediate(static_cast<int8_t>(value)));
977 break;
978 case DataType::Type::kInt64:
979 DCHECK_EQ(2u, instruction->GetVectorLength());
980 __ psrlq(dst, Immediate(static_cast<int8_t>(value)));
981 break;
982 default:
983 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
984 UNREACHABLE();
985 }
986 }
987
VisitVecSetScalars(HVecSetScalars * instruction)988 void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
989 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
990
991 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
992
993 HInstruction* input = instruction->InputAt(0);
994 bool is_zero = IsZeroBitPattern(input);
995
996 switch (instruction->GetPackedType()) {
997 case DataType::Type::kBool:
998 case DataType::Type::kUint8:
999 case DataType::Type::kInt8:
1000 case DataType::Type::kUint16:
1001 case DataType::Type::kInt16:
1002 case DataType::Type::kInt32:
1003 case DataType::Type::kInt64:
1004 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1005 : Location::RequiresRegister());
1006 locations->SetOut(Location::RequiresFpuRegister());
1007 break;
1008 case DataType::Type::kFloat32:
1009 case DataType::Type::kFloat64:
1010 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1011 : Location::RequiresFpuRegister());
1012 locations->SetOut(Location::RequiresFpuRegister());
1013 break;
1014 default:
1015 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1016 UNREACHABLE();
1017 }
1018 }
1019
VisitVecSetScalars(HVecSetScalars * instruction)1020 void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
1021 LocationSummary* locations = instruction->GetLocations();
1022 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1023
1024 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
1025
1026 // Zero out all other elements first.
1027 __ xorps(dst, dst);
1028
1029 // Shorthand for any type of zero.
1030 if (IsZeroBitPattern(instruction->InputAt(0))) {
1031 return;
1032 }
1033
1034 // Set required elements.
1035 switch (instruction->GetPackedType()) {
1036 case DataType::Type::kBool:
1037 case DataType::Type::kUint8:
1038 case DataType::Type::kInt8:
1039 case DataType::Type::kUint16:
1040 case DataType::Type::kInt16: // TODO: up to here, and?
1041 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1042 UNREACHABLE();
1043 case DataType::Type::kInt32:
1044 DCHECK_EQ(4u, instruction->GetVectorLength());
1045 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
1046 break;
1047 case DataType::Type::kInt64:
1048 DCHECK_EQ(2u, instruction->GetVectorLength());
1049 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
1050 break;
1051 case DataType::Type::kFloat32:
1052 DCHECK_EQ(4u, instruction->GetVectorLength());
1053 __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
1054 break;
1055 case DataType::Type::kFloat64:
1056 DCHECK_EQ(2u, instruction->GetVectorLength());
1057 __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
1058 break;
1059 default:
1060 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1061 UNREACHABLE();
1062 }
1063 }
1064
1065 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1066 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1067 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1068 switch (instruction->GetPackedType()) {
1069 case DataType::Type::kUint8:
1070 case DataType::Type::kInt8:
1071 case DataType::Type::kUint16:
1072 case DataType::Type::kInt16:
1073 case DataType::Type::kInt32:
1074 case DataType::Type::kInt64:
1075 locations->SetInAt(0, Location::RequiresFpuRegister());
1076 locations->SetInAt(1, Location::RequiresFpuRegister());
1077 locations->SetInAt(2, Location::RequiresFpuRegister());
1078 locations->SetOut(Location::SameAsFirstInput());
1079 break;
1080 default:
1081 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1082 UNREACHABLE();
1083 }
1084 }
1085
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1086 void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1087 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1088 }
1089
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1090 void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1091 // TODO: pmaddwd?
1092 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1093 }
1094
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1095 void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1096 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1097 }
1098
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1099 void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1100 // TODO: psadbw for unsigned?
1101 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1102 }
1103
VisitVecDotProd(HVecDotProd * instruction)1104 void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) {
1105 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1106 }
1107
VisitVecDotProd(HVecDotProd * instruction)1108 void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) {
1109 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1110 }
1111
1112 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1113 static void CreateVecMemLocations(ArenaAllocator* allocator,
1114 HVecMemoryOperation* instruction,
1115 bool is_load) {
1116 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1117 switch (instruction->GetPackedType()) {
1118 case DataType::Type::kBool:
1119 case DataType::Type::kUint8:
1120 case DataType::Type::kInt8:
1121 case DataType::Type::kUint16:
1122 case DataType::Type::kInt16:
1123 case DataType::Type::kInt32:
1124 case DataType::Type::kInt64:
1125 case DataType::Type::kFloat32:
1126 case DataType::Type::kFloat64:
1127 locations->SetInAt(0, Location::RequiresRegister());
1128 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1129 if (is_load) {
1130 locations->SetOut(Location::RequiresFpuRegister());
1131 } else {
1132 locations->SetInAt(2, Location::RequiresFpuRegister());
1133 }
1134 break;
1135 default:
1136 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1137 UNREACHABLE();
1138 }
1139 }
1140
1141 // Helper to construct address for vector memory operations.
VecAddress(LocationSummary * locations,size_t size,bool is_string_char_at)1142 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
1143 Location base = locations->InAt(0);
1144 Location index = locations->InAt(1);
1145 ScaleFactor scale = TIMES_1;
1146 switch (size) {
1147 case 2: scale = TIMES_2; break;
1148 case 4: scale = TIMES_4; break;
1149 case 8: scale = TIMES_8; break;
1150 default: break;
1151 }
1152 // Incorporate the string or array offset in the address computation.
1153 uint32_t offset = is_string_char_at
1154 ? mirror::String::ValueOffset().Uint32Value()
1155 : mirror::Array::DataOffset(size).Uint32Value();
1156 return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
1157 }
1158
VisitVecLoad(HVecLoad * instruction)1159 void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
1160 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1161 // String load requires a temporary for the compressed load.
1162 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1163 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1164 }
1165 }
1166
VisitVecLoad(HVecLoad * instruction)1167 void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
1168 LocationSummary* locations = instruction->GetLocations();
1169 size_t size = DataType::Size(instruction->GetPackedType());
1170 Address address = VecAddress(locations, size, instruction->IsStringCharAt());
1171 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
1172 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1173 switch (instruction->GetPackedType()) {
1174 case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1175 case DataType::Type::kUint16:
1176 DCHECK_EQ(8u, instruction->GetVectorLength());
1177 // Special handling of compressed/uncompressed string load.
1178 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1179 NearLabel done, not_compressed;
1180 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1181 // Test compression bit.
1182 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1183 "Expecting 0=compressed, 1=uncompressed");
1184 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1185 __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
1186 __ j(kNotZero, ¬_compressed);
1187 // Zero extend 8 compressed bytes into 8 chars.
1188 __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
1189 __ pxor(tmp, tmp);
1190 __ punpcklbw(reg, tmp);
1191 __ jmp(&done);
1192 // Load 8 direct uncompressed chars.
1193 __ Bind(¬_compressed);
1194 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1195 __ Bind(&done);
1196 return;
1197 }
1198 FALLTHROUGH_INTENDED;
1199 case DataType::Type::kBool:
1200 case DataType::Type::kUint8:
1201 case DataType::Type::kInt8:
1202 case DataType::Type::kInt32:
1203 case DataType::Type::kInt64:
1204 DCHECK_LE(2u, instruction->GetVectorLength());
1205 DCHECK_LE(instruction->GetVectorLength(), 16u);
1206 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1207 break;
1208 case DataType::Type::kFloat32:
1209 DCHECK_EQ(4u, instruction->GetVectorLength());
1210 is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
1211 break;
1212 case DataType::Type::kFloat64:
1213 DCHECK_EQ(2u, instruction->GetVectorLength());
1214 is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
1215 break;
1216 default:
1217 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1218 UNREACHABLE();
1219 }
1220 }
1221
VisitVecStore(HVecStore * instruction)1222 void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
1223 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1224 }
1225
VisitVecStore(HVecStore * instruction)1226 void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
1227 LocationSummary* locations = instruction->GetLocations();
1228 size_t size = DataType::Size(instruction->GetPackedType());
1229 Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
1230 XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
1231 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1232 switch (instruction->GetPackedType()) {
1233 case DataType::Type::kBool:
1234 case DataType::Type::kUint8:
1235 case DataType::Type::kInt8:
1236 case DataType::Type::kUint16:
1237 case DataType::Type::kInt16:
1238 case DataType::Type::kInt32:
1239 case DataType::Type::kInt64:
1240 DCHECK_LE(2u, instruction->GetVectorLength());
1241 DCHECK_LE(instruction->GetVectorLength(), 16u);
1242 is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
1243 break;
1244 case DataType::Type::kFloat32:
1245 DCHECK_EQ(4u, instruction->GetVectorLength());
1246 is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
1247 break;
1248 case DataType::Type::kFloat64:
1249 DCHECK_EQ(2u, instruction->GetVectorLength());
1250 is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
1251 break;
1252 default:
1253 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1254 UNREACHABLE();
1255 }
1256 }
1257
1258 #undef __
1259
1260 } // namespace x86_64
1261 } // namespace art
1262