1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "mirror/array-inl.h"
20 #include "mirror/string.h"
21
22 namespace art {
23 namespace x86 {
24
25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
26 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
27
VisitVecReplicateScalar(HVecReplicateScalar * instruction)28 void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
29 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
30 HInstruction* input = instruction->InputAt(0);
31 bool is_zero = IsZeroBitPattern(input);
32 switch (instruction->GetPackedType()) {
33 case DataType::Type::kInt64:
34 // Long needs extra temporary to load from the register pair.
35 if (!is_zero) {
36 locations->AddTemp(Location::RequiresFpuRegister());
37 }
38 FALLTHROUGH_INTENDED;
39 case DataType::Type::kBool:
40 case DataType::Type::kUint8:
41 case DataType::Type::kInt8:
42 case DataType::Type::kUint16:
43 case DataType::Type::kInt16:
44 case DataType::Type::kInt32:
45 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
46 : Location::RequiresRegister());
47 locations->SetOut(Location::RequiresFpuRegister());
48 break;
49 case DataType::Type::kFloat32:
50 case DataType::Type::kFloat64:
51 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
52 : Location::RequiresFpuRegister());
53 locations->SetOut(is_zero ? Location::RequiresFpuRegister()
54 : Location::SameAsFirstInput());
55 break;
56 default:
57 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
58 UNREACHABLE();
59 }
60 }
61
VisitVecReplicateScalar(HVecReplicateScalar * instruction)62 void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
63 LocationSummary* locations = instruction->GetLocations();
64 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
65
66 bool cpu_has_avx = CpuHasAvxFeatureFlag();
67 // Shorthand for any type of zero.
68 if (IsZeroBitPattern(instruction->InputAt(0))) {
69 cpu_has_avx ? __ vxorps(dst, dst, dst) : __ xorps(dst, dst);
70 return;
71 }
72
73 switch (instruction->GetPackedType()) {
74 case DataType::Type::kBool:
75 case DataType::Type::kUint8:
76 case DataType::Type::kInt8:
77 DCHECK_EQ(16u, instruction->GetVectorLength());
78 __ movd(dst, locations->InAt(0).AsRegister<Register>());
79 __ punpcklbw(dst, dst);
80 __ punpcklwd(dst, dst);
81 __ pshufd(dst, dst, Immediate(0));
82 break;
83 case DataType::Type::kUint16:
84 case DataType::Type::kInt16:
85 DCHECK_EQ(8u, instruction->GetVectorLength());
86 __ movd(dst, locations->InAt(0).AsRegister<Register>());
87 __ punpcklwd(dst, dst);
88 __ pshufd(dst, dst, Immediate(0));
89 break;
90 case DataType::Type::kInt32:
91 DCHECK_EQ(4u, instruction->GetVectorLength());
92 __ movd(dst, locations->InAt(0).AsRegister<Register>());
93 __ pshufd(dst, dst, Immediate(0));
94 break;
95 case DataType::Type::kInt64: {
96 DCHECK_EQ(2u, instruction->GetVectorLength());
97 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
98 __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
99 __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
100 __ punpckldq(dst, tmp);
101 __ punpcklqdq(dst, dst);
102 break;
103 }
104 case DataType::Type::kFloat32:
105 DCHECK_EQ(4u, instruction->GetVectorLength());
106 DCHECK(locations->InAt(0).Equals(locations->Out()));
107 __ shufps(dst, dst, Immediate(0));
108 break;
109 case DataType::Type::kFloat64:
110 DCHECK_EQ(2u, instruction->GetVectorLength());
111 DCHECK(locations->InAt(0).Equals(locations->Out()));
112 __ shufpd(dst, dst, Immediate(0));
113 break;
114 default:
115 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
116 UNREACHABLE();
117 }
118 }
119
VisitVecExtractScalar(HVecExtractScalar * instruction)120 void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
121 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
122 switch (instruction->GetPackedType()) {
123 case DataType::Type::kInt64:
124 // Long needs extra temporary to store into the register pair.
125 locations->AddTemp(Location::RequiresFpuRegister());
126 FALLTHROUGH_INTENDED;
127 case DataType::Type::kBool:
128 case DataType::Type::kUint8:
129 case DataType::Type::kInt8:
130 case DataType::Type::kUint16:
131 case DataType::Type::kInt16:
132 case DataType::Type::kInt32:
133 locations->SetInAt(0, Location::RequiresFpuRegister());
134 locations->SetOut(Location::RequiresRegister());
135 break;
136 case DataType::Type::kFloat32:
137 case DataType::Type::kFloat64:
138 locations->SetInAt(0, Location::RequiresFpuRegister());
139 locations->SetOut(Location::SameAsFirstInput());
140 break;
141 default:
142 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
143 UNREACHABLE();
144 }
145 }
146
VisitVecExtractScalar(HVecExtractScalar * instruction)147 void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
148 LocationSummary* locations = instruction->GetLocations();
149 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
150 switch (instruction->GetPackedType()) {
151 case DataType::Type::kBool:
152 case DataType::Type::kUint8:
153 case DataType::Type::kInt8:
154 case DataType::Type::kUint16:
155 case DataType::Type::kInt16: // TODO: up to here, and?
156 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
157 UNREACHABLE();
158 case DataType::Type::kInt32:
159 DCHECK_LE(4u, instruction->GetVectorLength());
160 DCHECK_LE(instruction->GetVectorLength(), 16u);
161 __ movd(locations->Out().AsRegister<Register>(), src);
162 break;
163 case DataType::Type::kInt64: {
164 DCHECK_EQ(2u, instruction->GetVectorLength());
165 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
166 __ movd(locations->Out().AsRegisterPairLow<Register>(), src);
167 __ pshufd(tmp, src, Immediate(1));
168 __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp);
169 break;
170 }
171 case DataType::Type::kFloat32:
172 case DataType::Type::kFloat64:
173 DCHECK_LE(2u, instruction->GetVectorLength());
174 DCHECK_LE(instruction->GetVectorLength(), 4u);
175 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
176 break;
177 default:
178 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
179 UNREACHABLE();
180 }
181 }
182
183 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)184 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
185 LocationSummary* locations = new (allocator) LocationSummary(instruction);
186 switch (instruction->GetPackedType()) {
187 case DataType::Type::kBool:
188 case DataType::Type::kUint8:
189 case DataType::Type::kInt8:
190 case DataType::Type::kUint16:
191 case DataType::Type::kInt16:
192 case DataType::Type::kInt32:
193 case DataType::Type::kInt64:
194 case DataType::Type::kFloat32:
195 case DataType::Type::kFloat64:
196 locations->SetInAt(0, Location::RequiresFpuRegister());
197 locations->SetOut(Location::RequiresFpuRegister());
198 break;
199 default:
200 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
201 UNREACHABLE();
202 }
203 }
204
VisitVecReduce(HVecReduce * instruction)205 void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) {
206 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
207 // Long reduction or min/max require a temporary.
208 if (instruction->GetPackedType() == DataType::Type::kInt64 ||
209 instruction->GetReductionKind() == HVecReduce::kMin ||
210 instruction->GetReductionKind() == HVecReduce::kMax) {
211 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
212 }
213 }
214
VisitVecReduce(HVecReduce * instruction)215 void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
216 LocationSummary* locations = instruction->GetLocations();
217 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
218 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
219 switch (instruction->GetPackedType()) {
220 case DataType::Type::kInt32:
221 DCHECK_EQ(4u, instruction->GetVectorLength());
222 switch (instruction->GetReductionKind()) {
223 case HVecReduce::kSum:
224 __ movaps(dst, src);
225 __ phaddd(dst, dst);
226 __ phaddd(dst, dst);
227 break;
228 case HVecReduce::kMin:
229 case HVecReduce::kMax:
230 // Historical note: We've had a broken implementation here. b/117863065
231 // Do not draw on the old code if we ever want to bring MIN/MAX reduction back.
232 LOG(FATAL) << "Unsupported reduction type.";
233 }
234 break;
235 case DataType::Type::kInt64: {
236 DCHECK_EQ(2u, instruction->GetVectorLength());
237 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
238 switch (instruction->GetReductionKind()) {
239 case HVecReduce::kSum:
240 __ movaps(tmp, src);
241 __ movaps(dst, src);
242 __ punpckhqdq(tmp, tmp);
243 __ paddq(dst, tmp);
244 break;
245 case HVecReduce::kMin:
246 case HVecReduce::kMax:
247 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
248 }
249 break;
250 }
251 default:
252 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
253 UNREACHABLE();
254 }
255 }
256
VisitVecCnv(HVecCnv * instruction)257 void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
258 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
259 }
260
VisitVecCnv(HVecCnv * instruction)261 void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
262 LocationSummary* locations = instruction->GetLocations();
263 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
264 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
265 DataType::Type from = instruction->GetInputType();
266 DataType::Type to = instruction->GetResultType();
267 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
268 DCHECK_EQ(4u, instruction->GetVectorLength());
269 __ cvtdq2ps(dst, src);
270 } else {
271 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
272 }
273 }
274
VisitVecNeg(HVecNeg * instruction)275 void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) {
276 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
277 }
278
VisitVecNeg(HVecNeg * instruction)279 void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
280 LocationSummary* locations = instruction->GetLocations();
281 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
282 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
283 switch (instruction->GetPackedType()) {
284 case DataType::Type::kUint8:
285 case DataType::Type::kInt8:
286 DCHECK_EQ(16u, instruction->GetVectorLength());
287 __ pxor(dst, dst);
288 __ psubb(dst, src);
289 break;
290 case DataType::Type::kUint16:
291 case DataType::Type::kInt16:
292 DCHECK_EQ(8u, instruction->GetVectorLength());
293 __ pxor(dst, dst);
294 __ psubw(dst, src);
295 break;
296 case DataType::Type::kInt32:
297 DCHECK_EQ(4u, instruction->GetVectorLength());
298 __ pxor(dst, dst);
299 __ psubd(dst, src);
300 break;
301 case DataType::Type::kInt64:
302 DCHECK_EQ(2u, instruction->GetVectorLength());
303 __ pxor(dst, dst);
304 __ psubq(dst, src);
305 break;
306 case DataType::Type::kFloat32:
307 DCHECK_EQ(4u, instruction->GetVectorLength());
308 __ xorps(dst, dst);
309 __ subps(dst, src);
310 break;
311 case DataType::Type::kFloat64:
312 DCHECK_EQ(2u, instruction->GetVectorLength());
313 __ xorpd(dst, dst);
314 __ subpd(dst, src);
315 break;
316 default:
317 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
318 UNREACHABLE();
319 }
320 }
321
VisitVecAbs(HVecAbs * instruction)322 void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
323 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
324 // Integral-abs requires a temporary for the comparison.
325 if (instruction->GetPackedType() == DataType::Type::kInt32) {
326 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
327 }
328 }
329
VisitVecAbs(HVecAbs * instruction)330 void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
331 LocationSummary* locations = instruction->GetLocations();
332 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
333 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
334 switch (instruction->GetPackedType()) {
335 case DataType::Type::kInt32: {
336 DCHECK_EQ(4u, instruction->GetVectorLength());
337 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
338 __ movaps(dst, src);
339 __ pxor(tmp, tmp);
340 __ pcmpgtd(tmp, dst);
341 __ pxor(dst, tmp);
342 __ psubd(dst, tmp);
343 break;
344 }
345 case DataType::Type::kFloat32:
346 DCHECK_EQ(4u, instruction->GetVectorLength());
347 __ pcmpeqb(dst, dst); // all ones
348 __ psrld(dst, Immediate(1));
349 __ andps(dst, src);
350 break;
351 case DataType::Type::kFloat64:
352 DCHECK_EQ(2u, instruction->GetVectorLength());
353 __ pcmpeqb(dst, dst); // all ones
354 __ psrlq(dst, Immediate(1));
355 __ andpd(dst, src);
356 break;
357 default:
358 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
359 UNREACHABLE();
360 }
361 }
362
VisitVecNot(HVecNot * instruction)363 void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
364 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
365 // Boolean-not requires a temporary to construct the 16 x one.
366 if (instruction->GetPackedType() == DataType::Type::kBool) {
367 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
368 }
369 }
370
VisitVecNot(HVecNot * instruction)371 void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
372 LocationSummary* locations = instruction->GetLocations();
373 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
374 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
375 switch (instruction->GetPackedType()) {
376 case DataType::Type::kBool: { // special case boolean-not
377 DCHECK_EQ(16u, instruction->GetVectorLength());
378 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
379 __ pxor(dst, dst);
380 __ pcmpeqb(tmp, tmp); // all ones
381 __ psubb(dst, tmp); // 16 x one
382 __ pxor(dst, src);
383 break;
384 }
385 case DataType::Type::kUint8:
386 case DataType::Type::kInt8:
387 case DataType::Type::kUint16:
388 case DataType::Type::kInt16:
389 case DataType::Type::kInt32:
390 case DataType::Type::kInt64:
391 DCHECK_LE(2u, instruction->GetVectorLength());
392 DCHECK_LE(instruction->GetVectorLength(), 16u);
393 __ pcmpeqb(dst, dst); // all ones
394 __ pxor(dst, src);
395 break;
396 case DataType::Type::kFloat32:
397 DCHECK_EQ(4u, instruction->GetVectorLength());
398 __ pcmpeqb(dst, dst); // all ones
399 __ xorps(dst, src);
400 break;
401 case DataType::Type::kFloat64:
402 DCHECK_EQ(2u, instruction->GetVectorLength());
403 __ pcmpeqb(dst, dst); // all ones
404 __ xorpd(dst, src);
405 break;
406 default:
407 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
408 UNREACHABLE();
409 }
410 }
411
412 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)413 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
414 LocationSummary* locations = new (allocator) LocationSummary(instruction);
415 switch (instruction->GetPackedType()) {
416 case DataType::Type::kBool:
417 case DataType::Type::kUint8:
418 case DataType::Type::kInt8:
419 case DataType::Type::kUint16:
420 case DataType::Type::kInt16:
421 case DataType::Type::kInt32:
422 case DataType::Type::kInt64:
423 case DataType::Type::kFloat32:
424 case DataType::Type::kFloat64:
425 locations->SetInAt(0, Location::RequiresFpuRegister());
426 locations->SetInAt(1, Location::RequiresFpuRegister());
427 locations->SetOut(Location::SameAsFirstInput());
428 break;
429 default:
430 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
431 UNREACHABLE();
432 }
433 }
434
CreateVecTerOpLocations(ArenaAllocator * allocator,HVecOperation * instruction)435 static void CreateVecTerOpLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
436 LocationSummary* locations = new (allocator) LocationSummary(instruction);
437 switch (instruction->GetPackedType()) {
438 case DataType::Type::kBool:
439 case DataType::Type::kUint8:
440 case DataType::Type::kInt8:
441 case DataType::Type::kUint16:
442 case DataType::Type::kInt16:
443 case DataType::Type::kInt32:
444 case DataType::Type::kInt64:
445 case DataType::Type::kFloat32:
446 case DataType::Type::kFloat64:
447 locations->SetInAt(0, Location::RequiresFpuRegister());
448 locations->SetInAt(1, Location::RequiresFpuRegister());
449 locations->SetOut(Location::RequiresFpuRegister());
450 break;
451 default:
452 LOG(FATAL) << "Unsupported SIMD type";
453 UNREACHABLE();
454 }
455 }
456
VisitVecAdd(HVecAdd * instruction)457 void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
458 if (CpuHasAvxFeatureFlag()) {
459 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
460 } else {
461 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
462 }
463 }
464
VisitVecAdd(HVecAdd * instruction)465 void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
466 bool cpu_has_avx = CpuHasAvxFeatureFlag();
467 LocationSummary* locations = instruction->GetLocations();
468 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
469 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
470 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
471 DCHECK(cpu_has_avx || other_src == dst);
472 switch (instruction->GetPackedType()) {
473 case DataType::Type::kUint8:
474 case DataType::Type::kInt8:
475 DCHECK_EQ(16u, instruction->GetVectorLength());
476 cpu_has_avx ? __ vpaddb(dst, other_src, src) : __ paddb(dst, src);
477 break;
478 case DataType::Type::kUint16:
479 case DataType::Type::kInt16:
480 DCHECK_EQ(8u, instruction->GetVectorLength());
481 cpu_has_avx ? __ vpaddw(dst, other_src, src) : __ paddw(dst, src);
482 break;
483 case DataType::Type::kInt32:
484 DCHECK_EQ(4u, instruction->GetVectorLength());
485 cpu_has_avx ? __ vpaddd(dst, other_src, src) : __ paddd(dst, src);
486 break;
487 case DataType::Type::kInt64:
488 DCHECK_EQ(2u, instruction->GetVectorLength());
489 cpu_has_avx ? __ vpaddq(dst, other_src, src) : __ paddq(dst, src);
490 break;
491 case DataType::Type::kFloat32:
492 DCHECK_EQ(4u, instruction->GetVectorLength());
493 cpu_has_avx ? __ vaddps(dst, other_src, src) : __ addps(dst, src);
494 break;
495 case DataType::Type::kFloat64:
496 DCHECK_EQ(2u, instruction->GetVectorLength());
497 cpu_has_avx ? __ vaddpd(dst, other_src, src) : __ addpd(dst, src);
498 break;
499 default:
500 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
501 UNREACHABLE();
502 }
503 }
504
VisitVecSaturationAdd(HVecSaturationAdd * instruction)505 void LocationsBuilderX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
506 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
507 }
508
VisitVecSaturationAdd(HVecSaturationAdd * instruction)509 void InstructionCodeGeneratorX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
510 LocationSummary* locations = instruction->GetLocations();
511 DCHECK(locations->InAt(0).Equals(locations->Out()));
512 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
513 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
514 switch (instruction->GetPackedType()) {
515 case DataType::Type::kUint8:
516 DCHECK_EQ(16u, instruction->GetVectorLength());
517 __ paddusb(dst, src);
518 break;
519 case DataType::Type::kInt8:
520 DCHECK_EQ(16u, instruction->GetVectorLength());
521 __ paddsb(dst, src);
522 break;
523 case DataType::Type::kUint16:
524 DCHECK_EQ(8u, instruction->GetVectorLength());
525 __ paddusw(dst, src);
526 break;
527 case DataType::Type::kInt16:
528 DCHECK_EQ(8u, instruction->GetVectorLength());
529 __ paddsw(dst, src);
530 break;
531 default:
532 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
533 UNREACHABLE();
534 }
535 }
536
VisitVecHalvingAdd(HVecHalvingAdd * instruction)537 void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
538 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
539 }
540
VisitVecHalvingAdd(HVecHalvingAdd * instruction)541 void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
542 LocationSummary* locations = instruction->GetLocations();
543 DCHECK(locations->InAt(0).Equals(locations->Out()));
544 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
545 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
546
547 DCHECK(instruction->IsRounded());
548
549 switch (instruction->GetPackedType()) {
550 case DataType::Type::kUint8:
551 DCHECK_EQ(16u, instruction->GetVectorLength());
552 __ pavgb(dst, src);
553 break;
554 case DataType::Type::kUint16:
555 DCHECK_EQ(8u, instruction->GetVectorLength());
556 __ pavgw(dst, src);
557 break;
558 default:
559 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
560 UNREACHABLE();
561 }
562 }
563
VisitVecSub(HVecSub * instruction)564 void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
565 if (CpuHasAvxFeatureFlag()) {
566 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
567 } else {
568 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
569 }
570 }
571
VisitVecSub(HVecSub * instruction)572 void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
573 bool cpu_has_avx = CpuHasAvxFeatureFlag();
574 LocationSummary* locations = instruction->GetLocations();
575 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
576 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
577 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
578 DCHECK(cpu_has_avx || other_src == dst);
579 switch (instruction->GetPackedType()) {
580 case DataType::Type::kUint8:
581 case DataType::Type::kInt8:
582 DCHECK_EQ(16u, instruction->GetVectorLength());
583 cpu_has_avx ? __ vpsubb(dst, other_src, src) : __ psubb(dst, src);
584 break;
585 case DataType::Type::kUint16:
586 case DataType::Type::kInt16:
587 DCHECK_EQ(8u, instruction->GetVectorLength());
588 cpu_has_avx ? __ vpsubw(dst, other_src, src) : __ psubw(dst, src);
589 break;
590 case DataType::Type::kInt32:
591 DCHECK_EQ(4u, instruction->GetVectorLength());
592 cpu_has_avx ? __ vpsubd(dst, other_src, src) : __ psubd(dst, src);
593 break;
594 case DataType::Type::kInt64:
595 DCHECK_EQ(2u, instruction->GetVectorLength());
596 cpu_has_avx ? __ vpsubq(dst, other_src, src) : __ psubq(dst, src);
597 break;
598 case DataType::Type::kFloat32:
599 DCHECK_EQ(4u, instruction->GetVectorLength());
600 cpu_has_avx ? __ vsubps(dst, other_src, src) : __ subps(dst, src);
601 break;
602 case DataType::Type::kFloat64:
603 DCHECK_EQ(2u, instruction->GetVectorLength());
604 cpu_has_avx ? __ vsubpd(dst, other_src, src) : __ subpd(dst, src);
605 break;
606 default:
607 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
608 UNREACHABLE();
609 }
610 }
611
VisitVecSaturationSub(HVecSaturationSub * instruction)612 void LocationsBuilderX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
613 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
614 }
615
VisitVecSaturationSub(HVecSaturationSub * instruction)616 void InstructionCodeGeneratorX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
617 LocationSummary* locations = instruction->GetLocations();
618 DCHECK(locations->InAt(0).Equals(locations->Out()));
619 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
620 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
621 switch (instruction->GetPackedType()) {
622 case DataType::Type::kUint8:
623 DCHECK_EQ(16u, instruction->GetVectorLength());
624 __ psubusb(dst, src);
625 break;
626 case DataType::Type::kInt8:
627 DCHECK_EQ(16u, instruction->GetVectorLength());
628 __ psubsb(dst, src);
629 break;
630 case DataType::Type::kUint16:
631 DCHECK_EQ(8u, instruction->GetVectorLength());
632 __ psubusw(dst, src);
633 break;
634 case DataType::Type::kInt16:
635 DCHECK_EQ(8u, instruction->GetVectorLength());
636 __ psubsw(dst, src);
637 break;
638 default:
639 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
640 UNREACHABLE();
641 }
642 }
643
VisitVecMul(HVecMul * instruction)644 void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
645 if (CpuHasAvxFeatureFlag()) {
646 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
647 } else {
648 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
649 }
650 }
651
VisitVecMul(HVecMul * instruction)652 void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
653 bool cpu_has_avx = CpuHasAvxFeatureFlag();
654 LocationSummary* locations = instruction->GetLocations();
655 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
656 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
657 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
658 DCHECK(cpu_has_avx || other_src == dst);
659 switch (instruction->GetPackedType()) {
660 case DataType::Type::kUint16:
661 case DataType::Type::kInt16:
662 DCHECK_EQ(8u, instruction->GetVectorLength());
663 cpu_has_avx ? __ vpmullw(dst, other_src, src) : __ pmullw(dst, src);
664 break;
665 case DataType::Type::kInt32:
666 DCHECK_EQ(4u, instruction->GetVectorLength());
667 cpu_has_avx ? __ vpmulld(dst, other_src, src) : __ pmulld(dst, src);
668 break;
669 case DataType::Type::kFloat32:
670 DCHECK_EQ(4u, instruction->GetVectorLength());
671 cpu_has_avx ? __ vmulps(dst, other_src, src) : __ mulps(dst, src);
672 break;
673 case DataType::Type::kFloat64:
674 DCHECK_EQ(2u, instruction->GetVectorLength());
675 cpu_has_avx ? __ vmulpd(dst, other_src, src) : __ mulpd(dst, src);
676 break;
677 default:
678 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
679 UNREACHABLE();
680 }
681 }
682
VisitVecDiv(HVecDiv * instruction)683 void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
684 if (CpuHasAvxFeatureFlag()) {
685 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
686 } else {
687 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
688 }
689 }
690
VisitVecDiv(HVecDiv * instruction)691 void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
692 bool cpu_has_avx = CpuHasAvxFeatureFlag();
693 LocationSummary* locations = instruction->GetLocations();
694 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
695 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
696 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
697 DCHECK(cpu_has_avx || other_src == dst);
698 switch (instruction->GetPackedType()) {
699 case DataType::Type::kFloat32:
700 DCHECK_EQ(4u, instruction->GetVectorLength());
701 cpu_has_avx ? __ vdivps(dst, other_src, src) : __ divps(dst, src);
702 break;
703 case DataType::Type::kFloat64:
704 DCHECK_EQ(2u, instruction->GetVectorLength());
705 cpu_has_avx ? __ vdivpd(dst, other_src, src) : __ divpd(dst, src);
706 break;
707 default:
708 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
709 UNREACHABLE();
710 }
711 }
712
VisitVecMin(HVecMin * instruction)713 void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
714 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
715 }
716
VisitVecMin(HVecMin * instruction)717 void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
718 LocationSummary* locations = instruction->GetLocations();
719 DCHECK(locations->InAt(0).Equals(locations->Out()));
720 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
721 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
722 switch (instruction->GetPackedType()) {
723 case DataType::Type::kUint8:
724 DCHECK_EQ(16u, instruction->GetVectorLength());
725 __ pminub(dst, src);
726 break;
727 case DataType::Type::kInt8:
728 DCHECK_EQ(16u, instruction->GetVectorLength());
729 __ pminsb(dst, src);
730 break;
731 case DataType::Type::kUint16:
732 DCHECK_EQ(8u, instruction->GetVectorLength());
733 __ pminuw(dst, src);
734 break;
735 case DataType::Type::kInt16:
736 DCHECK_EQ(8u, instruction->GetVectorLength());
737 __ pminsw(dst, src);
738 break;
739 case DataType::Type::kUint32:
740 DCHECK_EQ(4u, instruction->GetVectorLength());
741 __ pminud(dst, src);
742 break;
743 case DataType::Type::kInt32:
744 DCHECK_EQ(4u, instruction->GetVectorLength());
745 __ pminsd(dst, src);
746 break;
747 // Next cases are sloppy wrt 0.0 vs -0.0.
748 case DataType::Type::kFloat32:
749 DCHECK_EQ(4u, instruction->GetVectorLength());
750 __ minps(dst, src);
751 break;
752 case DataType::Type::kFloat64:
753 DCHECK_EQ(2u, instruction->GetVectorLength());
754 __ minpd(dst, src);
755 break;
756 default:
757 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
758 UNREACHABLE();
759 }
760 }
761
VisitVecMax(HVecMax * instruction)762 void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
763 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
764 }
765
VisitVecMax(HVecMax * instruction)766 void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
767 LocationSummary* locations = instruction->GetLocations();
768 DCHECK(locations->InAt(0).Equals(locations->Out()));
769 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
770 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
771 switch (instruction->GetPackedType()) {
772 case DataType::Type::kUint8:
773 DCHECK_EQ(16u, instruction->GetVectorLength());
774 __ pmaxub(dst, src);
775 break;
776 case DataType::Type::kInt8:
777 DCHECK_EQ(16u, instruction->GetVectorLength());
778 __ pmaxsb(dst, src);
779 break;
780 case DataType::Type::kUint16:
781 DCHECK_EQ(8u, instruction->GetVectorLength());
782 __ pmaxuw(dst, src);
783 break;
784 case DataType::Type::kInt16:
785 DCHECK_EQ(8u, instruction->GetVectorLength());
786 __ pmaxsw(dst, src);
787 break;
788 case DataType::Type::kUint32:
789 DCHECK_EQ(4u, instruction->GetVectorLength());
790 __ pmaxud(dst, src);
791 break;
792 case DataType::Type::kInt32:
793 DCHECK_EQ(4u, instruction->GetVectorLength());
794 __ pmaxsd(dst, src);
795 break;
796 // Next cases are sloppy wrt 0.0 vs -0.0.
797 case DataType::Type::kFloat32:
798 DCHECK_EQ(4u, instruction->GetVectorLength());
799 __ maxps(dst, src);
800 break;
801 case DataType::Type::kFloat64:
802 DCHECK_EQ(2u, instruction->GetVectorLength());
803 __ maxpd(dst, src);
804 break;
805 default:
806 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
807 UNREACHABLE();
808 }
809 }
810
VisitVecAnd(HVecAnd * instruction)811 void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
812 if (CpuHasAvxFeatureFlag()) {
813 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
814 } else {
815 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
816 }
817 }
818
VisitVecAnd(HVecAnd * instruction)819 void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
820 bool cpu_has_avx = CpuHasAvxFeatureFlag();
821 LocationSummary* locations = instruction->GetLocations();
822 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
823 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
824 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
825 DCHECK(cpu_has_avx || other_src == dst);
826 switch (instruction->GetPackedType()) {
827 case DataType::Type::kBool:
828 case DataType::Type::kUint8:
829 case DataType::Type::kInt8:
830 case DataType::Type::kUint16:
831 case DataType::Type::kInt16:
832 case DataType::Type::kInt32:
833 case DataType::Type::kInt64:
834 DCHECK_LE(2u, instruction->GetVectorLength());
835 DCHECK_LE(instruction->GetVectorLength(), 16u);
836 cpu_has_avx ? __ vpand(dst, other_src, src) : __ pand(dst, src);
837 break;
838 case DataType::Type::kFloat32:
839 DCHECK_EQ(4u, instruction->GetVectorLength());
840 cpu_has_avx ? __ vandps(dst, other_src, src) : __ andps(dst, src);
841 break;
842 case DataType::Type::kFloat64:
843 DCHECK_EQ(2u, instruction->GetVectorLength());
844 cpu_has_avx ? __ vandpd(dst, other_src, src) : __ andpd(dst, src);
845 break;
846 default:
847 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
848 UNREACHABLE();
849 }
850 }
851
VisitVecAndNot(HVecAndNot * instruction)852 void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) {
853 if (CpuHasAvxFeatureFlag()) {
854 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
855 } else {
856 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
857 }
858 }
859
VisitVecAndNot(HVecAndNot * instruction)860 void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
861 bool cpu_has_avx = CpuHasAvxFeatureFlag();
862 LocationSummary* locations = instruction->GetLocations();
863 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
864 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
865 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
866 DCHECK(cpu_has_avx || other_src == dst);
867 switch (instruction->GetPackedType()) {
868 case DataType::Type::kBool:
869 case DataType::Type::kUint8:
870 case DataType::Type::kInt8:
871 case DataType::Type::kUint16:
872 case DataType::Type::kInt16:
873 case DataType::Type::kInt32:
874 case DataType::Type::kInt64:
875 DCHECK_LE(2u, instruction->GetVectorLength());
876 DCHECK_LE(instruction->GetVectorLength(), 16u);
877 cpu_has_avx ? __ vpandn(dst, other_src, src) : __ pandn(dst, src);
878 break;
879 case DataType::Type::kFloat32:
880 DCHECK_EQ(4u, instruction->GetVectorLength());
881 cpu_has_avx ? __ vandnps(dst, other_src, src) : __ andnps(dst, src);
882 break;
883 case DataType::Type::kFloat64:
884 DCHECK_EQ(2u, instruction->GetVectorLength());
885 cpu_has_avx ? __ vandnpd(dst, other_src, src) : __ andnpd(dst, src);
886 break;
887 default:
888 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
889 UNREACHABLE();
890 }
891 }
892
VisitVecOr(HVecOr * instruction)893 void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) {
894 if (CpuHasAvxFeatureFlag()) {
895 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
896 } else {
897 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
898 }
899 }
900
VisitVecOr(HVecOr * instruction)901 void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
902 bool cpu_has_avx = CpuHasAvxFeatureFlag();
903 LocationSummary* locations = instruction->GetLocations();
904 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
905 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
906 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
907 DCHECK(cpu_has_avx || other_src == dst);
908 switch (instruction->GetPackedType()) {
909 case DataType::Type::kBool:
910 case DataType::Type::kUint8:
911 case DataType::Type::kInt8:
912 case DataType::Type::kUint16:
913 case DataType::Type::kInt16:
914 case DataType::Type::kInt32:
915 case DataType::Type::kInt64:
916 DCHECK_LE(2u, instruction->GetVectorLength());
917 DCHECK_LE(instruction->GetVectorLength(), 16u);
918 cpu_has_avx ? __ vpor(dst, other_src, src) : __ por(dst, src);
919 break;
920 case DataType::Type::kFloat32:
921 DCHECK_EQ(4u, instruction->GetVectorLength());
922 cpu_has_avx ? __ vorps(dst, other_src, src) : __ orps(dst, src);
923 break;
924 case DataType::Type::kFloat64:
925 DCHECK_EQ(2u, instruction->GetVectorLength());
926 cpu_has_avx ? __ vorpd(dst, other_src, src) : __ orpd(dst, src);
927 break;
928 default:
929 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
930 UNREACHABLE();
931 }
932 }
933
VisitVecXor(HVecXor * instruction)934 void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) {
935 if (CpuHasAvxFeatureFlag()) {
936 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
937 } else {
938 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
939 }
940 }
941
VisitVecXor(HVecXor * instruction)942 void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
943 bool cpu_has_avx = CpuHasAvxFeatureFlag();
944 LocationSummary* locations = instruction->GetLocations();
945 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
946 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
947 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
948 DCHECK(cpu_has_avx || other_src == dst);
949 switch (instruction->GetPackedType()) {
950 case DataType::Type::kBool:
951 case DataType::Type::kUint8:
952 case DataType::Type::kInt8:
953 case DataType::Type::kUint16:
954 case DataType::Type::kInt16:
955 case DataType::Type::kInt32:
956 case DataType::Type::kInt64:
957 DCHECK_LE(2u, instruction->GetVectorLength());
958 DCHECK_LE(instruction->GetVectorLength(), 16u);
959 cpu_has_avx ? __ vpxor(dst, other_src, src) : __ pxor(dst, src);
960 break;
961 case DataType::Type::kFloat32:
962 DCHECK_EQ(4u, instruction->GetVectorLength());
963 cpu_has_avx ? __ vxorps(dst, other_src, src) : __ xorps(dst, src);
964 break;
965 case DataType::Type::kFloat64:
966 DCHECK_EQ(2u, instruction->GetVectorLength());
967 cpu_has_avx ? __ vxorpd(dst, other_src, src) : __ xorpd(dst, src);
968 break;
969 default:
970 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
971 UNREACHABLE();
972 }
973 }
974
975 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)976 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
977 LocationSummary* locations = new (allocator) LocationSummary(instruction);
978 switch (instruction->GetPackedType()) {
979 case DataType::Type::kUint16:
980 case DataType::Type::kInt16:
981 case DataType::Type::kInt32:
982 case DataType::Type::kInt64:
983 locations->SetInAt(0, Location::RequiresFpuRegister());
984 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
985 locations->SetOut(Location::SameAsFirstInput());
986 break;
987 default:
988 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
989 UNREACHABLE();
990 }
991 }
992
VisitVecShl(HVecShl * instruction)993 void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) {
994 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
995 }
996
VisitVecShl(HVecShl * instruction)997 void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
998 LocationSummary* locations = instruction->GetLocations();
999 DCHECK(locations->InAt(0).Equals(locations->Out()));
1000 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
1001 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1002 switch (instruction->GetPackedType()) {
1003 case DataType::Type::kUint16:
1004 case DataType::Type::kInt16:
1005 DCHECK_EQ(8u, instruction->GetVectorLength());
1006 __ psllw(dst, Immediate(static_cast<uint8_t>(value)));
1007 break;
1008 case DataType::Type::kInt32:
1009 DCHECK_EQ(4u, instruction->GetVectorLength());
1010 __ pslld(dst, Immediate(static_cast<uint8_t>(value)));
1011 break;
1012 case DataType::Type::kInt64:
1013 DCHECK_EQ(2u, instruction->GetVectorLength());
1014 __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
1015 break;
1016 default:
1017 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1018 UNREACHABLE();
1019 }
1020 }
1021
VisitVecShr(HVecShr * instruction)1022 void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) {
1023 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
1024 }
1025
VisitVecShr(HVecShr * instruction)1026 void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
1027 LocationSummary* locations = instruction->GetLocations();
1028 DCHECK(locations->InAt(0).Equals(locations->Out()));
1029 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
1030 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1031 switch (instruction->GetPackedType()) {
1032 case DataType::Type::kUint16:
1033 case DataType::Type::kInt16:
1034 DCHECK_EQ(8u, instruction->GetVectorLength());
1035 __ psraw(dst, Immediate(static_cast<uint8_t>(value)));
1036 break;
1037 case DataType::Type::kInt32:
1038 DCHECK_EQ(4u, instruction->GetVectorLength());
1039 __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
1040 break;
1041 default:
1042 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1043 UNREACHABLE();
1044 }
1045 }
1046
VisitVecUShr(HVecUShr * instruction)1047 void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) {
1048 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
1049 }
1050
VisitVecUShr(HVecUShr * instruction)1051 void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
1052 LocationSummary* locations = instruction->GetLocations();
1053 DCHECK(locations->InAt(0).Equals(locations->Out()));
1054 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
1055 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1056 switch (instruction->GetPackedType()) {
1057 case DataType::Type::kUint16:
1058 case DataType::Type::kInt16:
1059 DCHECK_EQ(8u, instruction->GetVectorLength());
1060 __ psrlw(dst, Immediate(static_cast<uint8_t>(value)));
1061 break;
1062 case DataType::Type::kInt32:
1063 DCHECK_EQ(4u, instruction->GetVectorLength());
1064 __ psrld(dst, Immediate(static_cast<uint8_t>(value)));
1065 break;
1066 case DataType::Type::kInt64:
1067 DCHECK_EQ(2u, instruction->GetVectorLength());
1068 __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
1069 break;
1070 default:
1071 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1072 UNREACHABLE();
1073 }
1074 }
1075
VisitVecSetScalars(HVecSetScalars * instruction)1076 void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
1077 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1078
1079 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
1080
1081 HInstruction* input = instruction->InputAt(0);
1082 bool is_zero = IsZeroBitPattern(input);
1083
1084 switch (instruction->GetPackedType()) {
1085 case DataType::Type::kInt64:
1086 // Long needs extra temporary to load from register pairs.
1087 if (!is_zero) {
1088 locations->AddTemp(Location::RequiresFpuRegister());
1089 }
1090 FALLTHROUGH_INTENDED;
1091 case DataType::Type::kBool:
1092 case DataType::Type::kUint8:
1093 case DataType::Type::kInt8:
1094 case DataType::Type::kUint16:
1095 case DataType::Type::kInt16:
1096 case DataType::Type::kInt32:
1097 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1098 : Location::RequiresRegister());
1099 locations->SetOut(Location::RequiresFpuRegister());
1100 break;
1101 case DataType::Type::kFloat32:
1102 case DataType::Type::kFloat64:
1103 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1104 : Location::RequiresFpuRegister());
1105 locations->SetOut(Location::RequiresFpuRegister());
1106 break;
1107 default:
1108 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1109 UNREACHABLE();
1110 }
1111 }
1112
VisitVecSetScalars(HVecSetScalars * instruction)1113 void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
1114 LocationSummary* locations = instruction->GetLocations();
1115 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1116
1117 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
1118
1119 // Zero out all other elements first.
1120 bool cpu_has_avx = CpuHasAvxFeatureFlag();
1121 cpu_has_avx ? __ vxorps(dst, dst, dst) : __ xorps(dst, dst);
1122
1123 // Shorthand for any type of zero.
1124 if (IsZeroBitPattern(instruction->InputAt(0))) {
1125 return;
1126 }
1127
1128 // Set required elements.
1129 switch (instruction->GetPackedType()) {
1130 case DataType::Type::kBool:
1131 case DataType::Type::kUint8:
1132 case DataType::Type::kInt8:
1133 case DataType::Type::kUint16:
1134 case DataType::Type::kInt16: // TODO: up to here, and?
1135 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1136 UNREACHABLE();
1137 case DataType::Type::kInt32:
1138 DCHECK_EQ(4u, instruction->GetVectorLength());
1139 __ movd(dst, locations->InAt(0).AsRegister<Register>());
1140 break;
1141 case DataType::Type::kInt64: {
1142 DCHECK_EQ(2u, instruction->GetVectorLength());
1143 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1144 __ xorps(tmp, tmp);
1145 __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
1146 __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
1147 __ punpckldq(dst, tmp);
1148 break;
1149 }
1150 case DataType::Type::kFloat32:
1151 DCHECK_EQ(4u, instruction->GetVectorLength());
1152 __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
1153 break;
1154 case DataType::Type::kFloat64:
1155 DCHECK_EQ(2u, instruction->GetVectorLength());
1156 __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
1157 break;
1158 default:
1159 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1160 UNREACHABLE();
1161 }
1162 }
1163
1164 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1165 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1166 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1167 switch (instruction->GetPackedType()) {
1168 case DataType::Type::kUint8:
1169 case DataType::Type::kInt8:
1170 case DataType::Type::kUint16:
1171 case DataType::Type::kInt16:
1172 case DataType::Type::kInt32:
1173 case DataType::Type::kInt64:
1174 locations->SetInAt(0, Location::RequiresFpuRegister());
1175 locations->SetInAt(1, Location::RequiresFpuRegister());
1176 locations->SetInAt(2, Location::RequiresFpuRegister());
1177 locations->SetOut(Location::SameAsFirstInput());
1178 break;
1179 default:
1180 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1181 UNREACHABLE();
1182 }
1183 }
1184
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1185 void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1186 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1187 }
1188
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1189 void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1190 // TODO: pmaddwd?
1191 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1192 }
1193
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1194 void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1195 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1196 }
1197
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1198 void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1199 // TODO: psadbw for unsigned?
1200 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1201 }
1202
VisitVecDotProd(HVecDotProd * instruction)1203 void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) {
1204 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1205 locations->SetInAt(0, Location::RequiresFpuRegister());
1206 locations->SetInAt(1, Location::RequiresFpuRegister());
1207 locations->SetInAt(2, Location::RequiresFpuRegister());
1208 locations->SetOut(Location::SameAsFirstInput());
1209 locations->AddTemp(Location::RequiresFpuRegister());
1210 }
1211
VisitVecDotProd(HVecDotProd * instruction)1212 void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) {
1213 bool cpu_has_avx = CpuHasAvxFeatureFlag();
1214 LocationSummary* locations = instruction->GetLocations();
1215 XmmRegister acc = locations->InAt(0).AsFpuRegister<XmmRegister>();
1216 XmmRegister left = locations->InAt(1).AsFpuRegister<XmmRegister>();
1217 XmmRegister right = locations->InAt(2).AsFpuRegister<XmmRegister>();
1218 switch (instruction->GetPackedType()) {
1219 case DataType::Type::kInt32: {
1220 DCHECK_EQ(4u, instruction->GetVectorLength());
1221 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1222 if (!cpu_has_avx) {
1223 __ movaps(tmp, right);
1224 __ pmaddwd(tmp, left);
1225 __ paddd(acc, tmp);
1226 } else {
1227 __ vpmaddwd(tmp, left, right);
1228 __ vpaddd(acc, acc, tmp);
1229 }
1230 break;
1231 }
1232 default:
1233 LOG(FATAL) << "Unsupported SIMD Type" << instruction->GetPackedType();
1234 UNREACHABLE();
1235 }
1236 }
1237
1238 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1239 static void CreateVecMemLocations(ArenaAllocator* allocator,
1240 HVecMemoryOperation* instruction,
1241 bool is_load) {
1242 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1243 switch (instruction->GetPackedType()) {
1244 case DataType::Type::kBool:
1245 case DataType::Type::kUint8:
1246 case DataType::Type::kInt8:
1247 case DataType::Type::kUint16:
1248 case DataType::Type::kInt16:
1249 case DataType::Type::kInt32:
1250 case DataType::Type::kInt64:
1251 case DataType::Type::kFloat32:
1252 case DataType::Type::kFloat64:
1253 locations->SetInAt(0, Location::RequiresRegister());
1254 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1255 if (is_load) {
1256 locations->SetOut(Location::RequiresFpuRegister());
1257 } else {
1258 locations->SetInAt(2, Location::RequiresFpuRegister());
1259 }
1260 break;
1261 default:
1262 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1263 UNREACHABLE();
1264 }
1265 }
1266
1267 // Helper to construct address for vector memory operations.
VecAddress(LocationSummary * locations,size_t size,bool is_string_char_at)1268 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
1269 Location base = locations->InAt(0);
1270 Location index = locations->InAt(1);
1271 ScaleFactor scale = TIMES_1;
1272 switch (size) {
1273 case 2: scale = TIMES_2; break;
1274 case 4: scale = TIMES_4; break;
1275 case 8: scale = TIMES_8; break;
1276 default: break;
1277 }
1278 // Incorporate the string or array offset in the address computation.
1279 uint32_t offset = is_string_char_at
1280 ? mirror::String::ValueOffset().Uint32Value()
1281 : mirror::Array::DataOffset(size).Uint32Value();
1282 return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
1283 }
1284
VisitVecLoad(HVecLoad * instruction)1285 void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
1286 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1287 // String load requires a temporary for the compressed load.
1288 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1289 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1290 }
1291 }
1292
VisitVecLoad(HVecLoad * instruction)1293 void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
1294 LocationSummary* locations = instruction->GetLocations();
1295 size_t size = DataType::Size(instruction->GetPackedType());
1296 Address address = VecAddress(locations, size, instruction->IsStringCharAt());
1297 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
1298 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1299 switch (instruction->GetPackedType()) {
1300 case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1301 case DataType::Type::kUint16:
1302 DCHECK_EQ(8u, instruction->GetVectorLength());
1303 // Special handling of compressed/uncompressed string load.
1304 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1305 NearLabel done, not_compressed;
1306 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1307 // Test compression bit.
1308 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1309 "Expecting 0=compressed, 1=uncompressed");
1310 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1311 __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
1312 __ j(kNotZero, ¬_compressed);
1313 // Zero extend 8 compressed bytes into 8 chars.
1314 __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
1315 __ pxor(tmp, tmp);
1316 __ punpcklbw(reg, tmp);
1317 __ jmp(&done);
1318 // Load 4 direct uncompressed chars.
1319 __ Bind(¬_compressed);
1320 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1321 __ Bind(&done);
1322 return;
1323 }
1324 FALLTHROUGH_INTENDED;
1325 case DataType::Type::kBool:
1326 case DataType::Type::kUint8:
1327 case DataType::Type::kInt8:
1328 case DataType::Type::kInt32:
1329 case DataType::Type::kInt64:
1330 DCHECK_LE(2u, instruction->GetVectorLength());
1331 DCHECK_LE(instruction->GetVectorLength(), 16u);
1332 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1333 break;
1334 case DataType::Type::kFloat32:
1335 DCHECK_EQ(4u, instruction->GetVectorLength());
1336 is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
1337 break;
1338 case DataType::Type::kFloat64:
1339 DCHECK_EQ(2u, instruction->GetVectorLength());
1340 is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
1341 break;
1342 default:
1343 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1344 UNREACHABLE();
1345 }
1346 }
1347
VisitVecStore(HVecStore * instruction)1348 void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
1349 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1350 }
1351
VisitVecStore(HVecStore * instruction)1352 void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
1353 LocationSummary* locations = instruction->GetLocations();
1354 size_t size = DataType::Size(instruction->GetPackedType());
1355 Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
1356 XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
1357 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1358 switch (instruction->GetPackedType()) {
1359 case DataType::Type::kBool:
1360 case DataType::Type::kUint8:
1361 case DataType::Type::kInt8:
1362 case DataType::Type::kUint16:
1363 case DataType::Type::kInt16:
1364 case DataType::Type::kInt32:
1365 case DataType::Type::kInt64:
1366 DCHECK_LE(2u, instruction->GetVectorLength());
1367 DCHECK_LE(instruction->GetVectorLength(), 16u);
1368 is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
1369 break;
1370 case DataType::Type::kFloat32:
1371 DCHECK_EQ(4u, instruction->GetVectorLength());
1372 is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
1373 break;
1374 case DataType::Type::kFloat64:
1375 DCHECK_EQ(2u, instruction->GetVectorLength());
1376 is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
1377 break;
1378 default:
1379 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1380 UNREACHABLE();
1381 }
1382 }
1383
1384 #undef __
1385
1386 } // namespace x86
1387 } // namespace art
1388