1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "mirror/array-inl.h"
20 #include "mirror/string.h"
21
22 namespace art {
23 namespace x86 {
24
25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
26 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
27
VisitVecReplicateScalar(HVecReplicateScalar * instruction)28 void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
29 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
30 HInstruction* input = instruction->InputAt(0);
31 bool is_zero = IsZeroBitPattern(input);
32 switch (instruction->GetPackedType()) {
33 case DataType::Type::kInt64:
34 // Long needs extra temporary to load from the register pair.
35 if (!is_zero) {
36 locations->AddTemp(Location::RequiresFpuRegister());
37 }
38 FALLTHROUGH_INTENDED;
39 case DataType::Type::kBool:
40 case DataType::Type::kUint8:
41 case DataType::Type::kInt8:
42 case DataType::Type::kUint16:
43 case DataType::Type::kInt16:
44 case DataType::Type::kInt32:
45 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
46 : Location::RequiresRegister());
47 locations->SetOut(Location::RequiresFpuRegister());
48 break;
49 case DataType::Type::kFloat32:
50 case DataType::Type::kFloat64:
51 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
52 : Location::RequiresFpuRegister());
53 locations->SetOut(is_zero ? Location::RequiresFpuRegister()
54 : Location::SameAsFirstInput());
55 break;
56 default:
57 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
58 UNREACHABLE();
59 }
60 }
61
VisitVecReplicateScalar(HVecReplicateScalar * instruction)62 void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
63 LocationSummary* locations = instruction->GetLocations();
64 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
65
66 // Shorthand for any type of zero.
67 if (IsZeroBitPattern(instruction->InputAt(0))) {
68 __ xorps(dst, dst);
69 return;
70 }
71
72 switch (instruction->GetPackedType()) {
73 case DataType::Type::kBool:
74 case DataType::Type::kUint8:
75 case DataType::Type::kInt8:
76 DCHECK_EQ(16u, instruction->GetVectorLength());
77 __ movd(dst, locations->InAt(0).AsRegister<Register>());
78 __ punpcklbw(dst, dst);
79 __ punpcklwd(dst, dst);
80 __ pshufd(dst, dst, Immediate(0));
81 break;
82 case DataType::Type::kUint16:
83 case DataType::Type::kInt16:
84 DCHECK_EQ(8u, instruction->GetVectorLength());
85 __ movd(dst, locations->InAt(0).AsRegister<Register>());
86 __ punpcklwd(dst, dst);
87 __ pshufd(dst, dst, Immediate(0));
88 break;
89 case DataType::Type::kInt32:
90 DCHECK_EQ(4u, instruction->GetVectorLength());
91 __ movd(dst, locations->InAt(0).AsRegister<Register>());
92 __ pshufd(dst, dst, Immediate(0));
93 break;
94 case DataType::Type::kInt64: {
95 DCHECK_EQ(2u, instruction->GetVectorLength());
96 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
97 __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
98 __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
99 __ punpckldq(dst, tmp);
100 __ punpcklqdq(dst, dst);
101 break;
102 }
103 case DataType::Type::kFloat32:
104 DCHECK_EQ(4u, instruction->GetVectorLength());
105 DCHECK(locations->InAt(0).Equals(locations->Out()));
106 __ shufps(dst, dst, Immediate(0));
107 break;
108 case DataType::Type::kFloat64:
109 DCHECK_EQ(2u, instruction->GetVectorLength());
110 DCHECK(locations->InAt(0).Equals(locations->Out()));
111 __ shufpd(dst, dst, Immediate(0));
112 break;
113 default:
114 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
115 UNREACHABLE();
116 }
117 }
118
VisitVecExtractScalar(HVecExtractScalar * instruction)119 void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
120 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
121 switch (instruction->GetPackedType()) {
122 case DataType::Type::kInt64:
123 // Long needs extra temporary to store into the register pair.
124 locations->AddTemp(Location::RequiresFpuRegister());
125 FALLTHROUGH_INTENDED;
126 case DataType::Type::kBool:
127 case DataType::Type::kUint8:
128 case DataType::Type::kInt8:
129 case DataType::Type::kUint16:
130 case DataType::Type::kInt16:
131 case DataType::Type::kInt32:
132 locations->SetInAt(0, Location::RequiresFpuRegister());
133 locations->SetOut(Location::RequiresRegister());
134 break;
135 case DataType::Type::kFloat32:
136 case DataType::Type::kFloat64:
137 locations->SetInAt(0, Location::RequiresFpuRegister());
138 locations->SetOut(Location::SameAsFirstInput());
139 break;
140 default:
141 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
142 UNREACHABLE();
143 }
144 }
145
VisitVecExtractScalar(HVecExtractScalar * instruction)146 void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
147 LocationSummary* locations = instruction->GetLocations();
148 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
149 switch (instruction->GetPackedType()) {
150 case DataType::Type::kBool:
151 case DataType::Type::kUint8:
152 case DataType::Type::kInt8:
153 case DataType::Type::kUint16:
154 case DataType::Type::kInt16: // TODO: up to here, and?
155 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
156 UNREACHABLE();
157 case DataType::Type::kInt32:
158 DCHECK_LE(4u, instruction->GetVectorLength());
159 DCHECK_LE(instruction->GetVectorLength(), 16u);
160 __ movd(locations->Out().AsRegister<Register>(), src);
161 break;
162 case DataType::Type::kInt64: {
163 DCHECK_EQ(2u, instruction->GetVectorLength());
164 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
165 __ movd(locations->Out().AsRegisterPairLow<Register>(), src);
166 __ pshufd(tmp, src, Immediate(1));
167 __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp);
168 break;
169 }
170 case DataType::Type::kFloat32:
171 case DataType::Type::kFloat64:
172 DCHECK_LE(2u, instruction->GetVectorLength());
173 DCHECK_LE(instruction->GetVectorLength(), 4u);
174 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
175 break;
176 default:
177 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
178 UNREACHABLE();
179 }
180 }
181
182 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)183 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
184 LocationSummary* locations = new (allocator) LocationSummary(instruction);
185 switch (instruction->GetPackedType()) {
186 case DataType::Type::kBool:
187 case DataType::Type::kUint8:
188 case DataType::Type::kInt8:
189 case DataType::Type::kUint16:
190 case DataType::Type::kInt16:
191 case DataType::Type::kInt32:
192 case DataType::Type::kInt64:
193 case DataType::Type::kFloat32:
194 case DataType::Type::kFloat64:
195 locations->SetInAt(0, Location::RequiresFpuRegister());
196 locations->SetOut(Location::RequiresFpuRegister());
197 break;
198 default:
199 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
200 UNREACHABLE();
201 }
202 }
203
VisitVecReduce(HVecReduce * instruction)204 void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) {
205 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
206 // Long reduction or min/max require a temporary.
207 if (instruction->GetPackedType() == DataType::Type::kInt64 ||
208 instruction->GetReductionKind() == HVecReduce::kMin ||
209 instruction->GetReductionKind() == HVecReduce::kMax) {
210 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
211 }
212 }
213
VisitVecReduce(HVecReduce * instruction)214 void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
215 LocationSummary* locations = instruction->GetLocations();
216 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
217 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
218 switch (instruction->GetPackedType()) {
219 case DataType::Type::kInt32:
220 DCHECK_EQ(4u, instruction->GetVectorLength());
221 switch (instruction->GetReductionKind()) {
222 case HVecReduce::kSum:
223 __ movaps(dst, src);
224 __ phaddd(dst, dst);
225 __ phaddd(dst, dst);
226 break;
227 case HVecReduce::kMin:
228 case HVecReduce::kMax:
229 // Historical note: We've had a broken implementation here. b/117863065
230 // Do not draw on the old code if we ever want to bring MIN/MAX reduction back.
231 LOG(FATAL) << "Unsupported reduction type.";
232 }
233 break;
234 case DataType::Type::kInt64: {
235 DCHECK_EQ(2u, instruction->GetVectorLength());
236 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
237 switch (instruction->GetReductionKind()) {
238 case HVecReduce::kSum:
239 __ movaps(tmp, src);
240 __ movaps(dst, src);
241 __ punpckhqdq(tmp, tmp);
242 __ paddq(dst, tmp);
243 break;
244 case HVecReduce::kMin:
245 case HVecReduce::kMax:
246 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
247 }
248 break;
249 }
250 default:
251 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
252 UNREACHABLE();
253 }
254 }
255
VisitVecCnv(HVecCnv * instruction)256 void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
257 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
258 }
259
VisitVecCnv(HVecCnv * instruction)260 void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
261 LocationSummary* locations = instruction->GetLocations();
262 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
263 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
264 DataType::Type from = instruction->GetInputType();
265 DataType::Type to = instruction->GetResultType();
266 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
267 DCHECK_EQ(4u, instruction->GetVectorLength());
268 __ cvtdq2ps(dst, src);
269 } else {
270 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
271 }
272 }
273
VisitVecNeg(HVecNeg * instruction)274 void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) {
275 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
276 }
277
VisitVecNeg(HVecNeg * instruction)278 void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
279 LocationSummary* locations = instruction->GetLocations();
280 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
281 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
282 switch (instruction->GetPackedType()) {
283 case DataType::Type::kUint8:
284 case DataType::Type::kInt8:
285 DCHECK_EQ(16u, instruction->GetVectorLength());
286 __ pxor(dst, dst);
287 __ psubb(dst, src);
288 break;
289 case DataType::Type::kUint16:
290 case DataType::Type::kInt16:
291 DCHECK_EQ(8u, instruction->GetVectorLength());
292 __ pxor(dst, dst);
293 __ psubw(dst, src);
294 break;
295 case DataType::Type::kInt32:
296 DCHECK_EQ(4u, instruction->GetVectorLength());
297 __ pxor(dst, dst);
298 __ psubd(dst, src);
299 break;
300 case DataType::Type::kInt64:
301 DCHECK_EQ(2u, instruction->GetVectorLength());
302 __ pxor(dst, dst);
303 __ psubq(dst, src);
304 break;
305 case DataType::Type::kFloat32:
306 DCHECK_EQ(4u, instruction->GetVectorLength());
307 __ xorps(dst, dst);
308 __ subps(dst, src);
309 break;
310 case DataType::Type::kFloat64:
311 DCHECK_EQ(2u, instruction->GetVectorLength());
312 __ xorpd(dst, dst);
313 __ subpd(dst, src);
314 break;
315 default:
316 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
317 UNREACHABLE();
318 }
319 }
320
VisitVecAbs(HVecAbs * instruction)321 void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
322 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
323 // Integral-abs requires a temporary for the comparison.
324 if (instruction->GetPackedType() == DataType::Type::kInt32) {
325 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
326 }
327 }
328
VisitVecAbs(HVecAbs * instruction)329 void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
330 LocationSummary* locations = instruction->GetLocations();
331 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
332 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
333 switch (instruction->GetPackedType()) {
334 case DataType::Type::kInt32: {
335 DCHECK_EQ(4u, instruction->GetVectorLength());
336 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
337 __ movaps(dst, src);
338 __ pxor(tmp, tmp);
339 __ pcmpgtd(tmp, dst);
340 __ pxor(dst, tmp);
341 __ psubd(dst, tmp);
342 break;
343 }
344 case DataType::Type::kFloat32:
345 DCHECK_EQ(4u, instruction->GetVectorLength());
346 __ pcmpeqb(dst, dst); // all ones
347 __ psrld(dst, Immediate(1));
348 __ andps(dst, src);
349 break;
350 case DataType::Type::kFloat64:
351 DCHECK_EQ(2u, instruction->GetVectorLength());
352 __ pcmpeqb(dst, dst); // all ones
353 __ psrlq(dst, Immediate(1));
354 __ andpd(dst, src);
355 break;
356 default:
357 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
358 UNREACHABLE();
359 }
360 }
361
VisitVecNot(HVecNot * instruction)362 void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
363 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
364 // Boolean-not requires a temporary to construct the 16 x one.
365 if (instruction->GetPackedType() == DataType::Type::kBool) {
366 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
367 }
368 }
369
VisitVecNot(HVecNot * instruction)370 void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
371 LocationSummary* locations = instruction->GetLocations();
372 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
373 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
374 switch (instruction->GetPackedType()) {
375 case DataType::Type::kBool: { // special case boolean-not
376 DCHECK_EQ(16u, instruction->GetVectorLength());
377 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
378 __ pxor(dst, dst);
379 __ pcmpeqb(tmp, tmp); // all ones
380 __ psubb(dst, tmp); // 16 x one
381 __ pxor(dst, src);
382 break;
383 }
384 case DataType::Type::kUint8:
385 case DataType::Type::kInt8:
386 case DataType::Type::kUint16:
387 case DataType::Type::kInt16:
388 case DataType::Type::kInt32:
389 case DataType::Type::kInt64:
390 DCHECK_LE(2u, instruction->GetVectorLength());
391 DCHECK_LE(instruction->GetVectorLength(), 16u);
392 __ pcmpeqb(dst, dst); // all ones
393 __ pxor(dst, src);
394 break;
395 case DataType::Type::kFloat32:
396 DCHECK_EQ(4u, instruction->GetVectorLength());
397 __ pcmpeqb(dst, dst); // all ones
398 __ xorps(dst, src);
399 break;
400 case DataType::Type::kFloat64:
401 DCHECK_EQ(2u, instruction->GetVectorLength());
402 __ pcmpeqb(dst, dst); // all ones
403 __ xorpd(dst, src);
404 break;
405 default:
406 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
407 UNREACHABLE();
408 }
409 }
410
411 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)412 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
413 LocationSummary* locations = new (allocator) LocationSummary(instruction);
414 switch (instruction->GetPackedType()) {
415 case DataType::Type::kBool:
416 case DataType::Type::kUint8:
417 case DataType::Type::kInt8:
418 case DataType::Type::kUint16:
419 case DataType::Type::kInt16:
420 case DataType::Type::kInt32:
421 case DataType::Type::kInt64:
422 case DataType::Type::kFloat32:
423 case DataType::Type::kFloat64:
424 locations->SetInAt(0, Location::RequiresFpuRegister());
425 locations->SetInAt(1, Location::RequiresFpuRegister());
426 locations->SetOut(Location::SameAsFirstInput());
427 break;
428 default:
429 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
430 UNREACHABLE();
431 }
432 }
433
VisitVecAdd(HVecAdd * instruction)434 void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
435 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
436 }
437
VisitVecAdd(HVecAdd * instruction)438 void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
439 LocationSummary* locations = instruction->GetLocations();
440 DCHECK(locations->InAt(0).Equals(locations->Out()));
441 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
442 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
443 switch (instruction->GetPackedType()) {
444 case DataType::Type::kUint8:
445 case DataType::Type::kInt8:
446 DCHECK_EQ(16u, instruction->GetVectorLength());
447 __ paddb(dst, src);
448 break;
449 case DataType::Type::kUint16:
450 case DataType::Type::kInt16:
451 DCHECK_EQ(8u, instruction->GetVectorLength());
452 __ paddw(dst, src);
453 break;
454 case DataType::Type::kInt32:
455 DCHECK_EQ(4u, instruction->GetVectorLength());
456 __ paddd(dst, src);
457 break;
458 case DataType::Type::kInt64:
459 DCHECK_EQ(2u, instruction->GetVectorLength());
460 __ paddq(dst, src);
461 break;
462 case DataType::Type::kFloat32:
463 DCHECK_EQ(4u, instruction->GetVectorLength());
464 __ addps(dst, src);
465 break;
466 case DataType::Type::kFloat64:
467 DCHECK_EQ(2u, instruction->GetVectorLength());
468 __ addpd(dst, src);
469 break;
470 default:
471 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
472 UNREACHABLE();
473 }
474 }
475
VisitVecSaturationAdd(HVecSaturationAdd * instruction)476 void LocationsBuilderX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
477 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
478 }
479
VisitVecSaturationAdd(HVecSaturationAdd * instruction)480 void InstructionCodeGeneratorX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
481 LocationSummary* locations = instruction->GetLocations();
482 DCHECK(locations->InAt(0).Equals(locations->Out()));
483 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
484 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
485 switch (instruction->GetPackedType()) {
486 case DataType::Type::kUint8:
487 DCHECK_EQ(16u, instruction->GetVectorLength());
488 __ paddusb(dst, src);
489 break;
490 case DataType::Type::kInt8:
491 DCHECK_EQ(16u, instruction->GetVectorLength());
492 __ paddsb(dst, src);
493 break;
494 case DataType::Type::kUint16:
495 DCHECK_EQ(8u, instruction->GetVectorLength());
496 __ paddusw(dst, src);
497 break;
498 case DataType::Type::kInt16:
499 DCHECK_EQ(8u, instruction->GetVectorLength());
500 __ paddsw(dst, src);
501 break;
502 default:
503 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
504 UNREACHABLE();
505 }
506 }
507
VisitVecHalvingAdd(HVecHalvingAdd * instruction)508 void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
509 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
510 }
511
VisitVecHalvingAdd(HVecHalvingAdd * instruction)512 void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
513 LocationSummary* locations = instruction->GetLocations();
514 DCHECK(locations->InAt(0).Equals(locations->Out()));
515 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
516 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
517
518 DCHECK(instruction->IsRounded());
519
520 switch (instruction->GetPackedType()) {
521 case DataType::Type::kUint8:
522 DCHECK_EQ(16u, instruction->GetVectorLength());
523 __ pavgb(dst, src);
524 break;
525 case DataType::Type::kUint16:
526 DCHECK_EQ(8u, instruction->GetVectorLength());
527 __ pavgw(dst, src);
528 break;
529 default:
530 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
531 UNREACHABLE();
532 }
533 }
534
VisitVecSub(HVecSub * instruction)535 void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
536 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
537 }
538
VisitVecSub(HVecSub * instruction)539 void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
540 LocationSummary* locations = instruction->GetLocations();
541 DCHECK(locations->InAt(0).Equals(locations->Out()));
542 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
543 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
544 switch (instruction->GetPackedType()) {
545 case DataType::Type::kUint8:
546 case DataType::Type::kInt8:
547 DCHECK_EQ(16u, instruction->GetVectorLength());
548 __ psubb(dst, src);
549 break;
550 case DataType::Type::kUint16:
551 case DataType::Type::kInt16:
552 DCHECK_EQ(8u, instruction->GetVectorLength());
553 __ psubw(dst, src);
554 break;
555 case DataType::Type::kInt32:
556 DCHECK_EQ(4u, instruction->GetVectorLength());
557 __ psubd(dst, src);
558 break;
559 case DataType::Type::kInt64:
560 DCHECK_EQ(2u, instruction->GetVectorLength());
561 __ psubq(dst, src);
562 break;
563 case DataType::Type::kFloat32:
564 DCHECK_EQ(4u, instruction->GetVectorLength());
565 __ subps(dst, src);
566 break;
567 case DataType::Type::kFloat64:
568 DCHECK_EQ(2u, instruction->GetVectorLength());
569 __ subpd(dst, src);
570 break;
571 default:
572 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
573 UNREACHABLE();
574 }
575 }
576
VisitVecSaturationSub(HVecSaturationSub * instruction)577 void LocationsBuilderX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
578 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
579 }
580
VisitVecSaturationSub(HVecSaturationSub * instruction)581 void InstructionCodeGeneratorX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
582 LocationSummary* locations = instruction->GetLocations();
583 DCHECK(locations->InAt(0).Equals(locations->Out()));
584 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
585 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
586 switch (instruction->GetPackedType()) {
587 case DataType::Type::kUint8:
588 DCHECK_EQ(16u, instruction->GetVectorLength());
589 __ psubusb(dst, src);
590 break;
591 case DataType::Type::kInt8:
592 DCHECK_EQ(16u, instruction->GetVectorLength());
593 __ psubsb(dst, src);
594 break;
595 case DataType::Type::kUint16:
596 DCHECK_EQ(8u, instruction->GetVectorLength());
597 __ psubusw(dst, src);
598 break;
599 case DataType::Type::kInt16:
600 DCHECK_EQ(8u, instruction->GetVectorLength());
601 __ psubsw(dst, src);
602 break;
603 default:
604 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
605 UNREACHABLE();
606 }
607 }
608
VisitVecMul(HVecMul * instruction)609 void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
610 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
611 }
612
VisitVecMul(HVecMul * instruction)613 void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
614 LocationSummary* locations = instruction->GetLocations();
615 DCHECK(locations->InAt(0).Equals(locations->Out()));
616 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
617 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
618 switch (instruction->GetPackedType()) {
619 case DataType::Type::kUint16:
620 case DataType::Type::kInt16:
621 DCHECK_EQ(8u, instruction->GetVectorLength());
622 __ pmullw(dst, src);
623 break;
624 case DataType::Type::kInt32:
625 DCHECK_EQ(4u, instruction->GetVectorLength());
626 __ pmulld(dst, src);
627 break;
628 case DataType::Type::kFloat32:
629 DCHECK_EQ(4u, instruction->GetVectorLength());
630 __ mulps(dst, src);
631 break;
632 case DataType::Type::kFloat64:
633 DCHECK_EQ(2u, instruction->GetVectorLength());
634 __ mulpd(dst, src);
635 break;
636 default:
637 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
638 UNREACHABLE();
639 }
640 }
641
VisitVecDiv(HVecDiv * instruction)642 void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
643 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
644 }
645
VisitVecDiv(HVecDiv * instruction)646 void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
647 LocationSummary* locations = instruction->GetLocations();
648 DCHECK(locations->InAt(0).Equals(locations->Out()));
649 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
650 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
651 switch (instruction->GetPackedType()) {
652 case DataType::Type::kFloat32:
653 DCHECK_EQ(4u, instruction->GetVectorLength());
654 __ divps(dst, src);
655 break;
656 case DataType::Type::kFloat64:
657 DCHECK_EQ(2u, instruction->GetVectorLength());
658 __ divpd(dst, src);
659 break;
660 default:
661 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
662 UNREACHABLE();
663 }
664 }
665
VisitVecMin(HVecMin * instruction)666 void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
667 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
668 }
669
VisitVecMin(HVecMin * instruction)670 void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
671 LocationSummary* locations = instruction->GetLocations();
672 DCHECK(locations->InAt(0).Equals(locations->Out()));
673 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
674 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
675 switch (instruction->GetPackedType()) {
676 case DataType::Type::kUint8:
677 DCHECK_EQ(16u, instruction->GetVectorLength());
678 __ pminub(dst, src);
679 break;
680 case DataType::Type::kInt8:
681 DCHECK_EQ(16u, instruction->GetVectorLength());
682 __ pminsb(dst, src);
683 break;
684 case DataType::Type::kUint16:
685 DCHECK_EQ(8u, instruction->GetVectorLength());
686 __ pminuw(dst, src);
687 break;
688 case DataType::Type::kInt16:
689 DCHECK_EQ(8u, instruction->GetVectorLength());
690 __ pminsw(dst, src);
691 break;
692 case DataType::Type::kUint32:
693 DCHECK_EQ(4u, instruction->GetVectorLength());
694 __ pminud(dst, src);
695 break;
696 case DataType::Type::kInt32:
697 DCHECK_EQ(4u, instruction->GetVectorLength());
698 __ pminsd(dst, src);
699 break;
700 // Next cases are sloppy wrt 0.0 vs -0.0.
701 case DataType::Type::kFloat32:
702 DCHECK_EQ(4u, instruction->GetVectorLength());
703 __ minps(dst, src);
704 break;
705 case DataType::Type::kFloat64:
706 DCHECK_EQ(2u, instruction->GetVectorLength());
707 __ minpd(dst, src);
708 break;
709 default:
710 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
711 UNREACHABLE();
712 }
713 }
714
VisitVecMax(HVecMax * instruction)715 void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
716 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
717 }
718
VisitVecMax(HVecMax * instruction)719 void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
720 LocationSummary* locations = instruction->GetLocations();
721 DCHECK(locations->InAt(0).Equals(locations->Out()));
722 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
723 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
724 switch (instruction->GetPackedType()) {
725 case DataType::Type::kUint8:
726 DCHECK_EQ(16u, instruction->GetVectorLength());
727 __ pmaxub(dst, src);
728 break;
729 case DataType::Type::kInt8:
730 DCHECK_EQ(16u, instruction->GetVectorLength());
731 __ pmaxsb(dst, src);
732 break;
733 case DataType::Type::kUint16:
734 DCHECK_EQ(8u, instruction->GetVectorLength());
735 __ pmaxuw(dst, src);
736 break;
737 case DataType::Type::kInt16:
738 DCHECK_EQ(8u, instruction->GetVectorLength());
739 __ pmaxsw(dst, src);
740 break;
741 case DataType::Type::kUint32:
742 DCHECK_EQ(4u, instruction->GetVectorLength());
743 __ pmaxud(dst, src);
744 break;
745 case DataType::Type::kInt32:
746 DCHECK_EQ(4u, instruction->GetVectorLength());
747 __ pmaxsd(dst, src);
748 break;
749 // Next cases are sloppy wrt 0.0 vs -0.0.
750 case DataType::Type::kFloat32:
751 DCHECK_EQ(4u, instruction->GetVectorLength());
752 __ maxps(dst, src);
753 break;
754 case DataType::Type::kFloat64:
755 DCHECK_EQ(2u, instruction->GetVectorLength());
756 __ maxpd(dst, src);
757 break;
758 default:
759 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
760 UNREACHABLE();
761 }
762 }
763
VisitVecAnd(HVecAnd * instruction)764 void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
765 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
766 }
767
VisitVecAnd(HVecAnd * instruction)768 void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
769 LocationSummary* locations = instruction->GetLocations();
770 DCHECK(locations->InAt(0).Equals(locations->Out()));
771 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
772 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
773 switch (instruction->GetPackedType()) {
774 case DataType::Type::kBool:
775 case DataType::Type::kUint8:
776 case DataType::Type::kInt8:
777 case DataType::Type::kUint16:
778 case DataType::Type::kInt16:
779 case DataType::Type::kInt32:
780 case DataType::Type::kInt64:
781 DCHECK_LE(2u, instruction->GetVectorLength());
782 DCHECK_LE(instruction->GetVectorLength(), 16u);
783 __ pand(dst, src);
784 break;
785 case DataType::Type::kFloat32:
786 DCHECK_EQ(4u, instruction->GetVectorLength());
787 __ andps(dst, src);
788 break;
789 case DataType::Type::kFloat64:
790 DCHECK_EQ(2u, instruction->GetVectorLength());
791 __ andpd(dst, src);
792 break;
793 default:
794 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
795 UNREACHABLE();
796 }
797 }
798
VisitVecAndNot(HVecAndNot * instruction)799 void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) {
800 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
801 }
802
VisitVecAndNot(HVecAndNot * instruction)803 void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
804 LocationSummary* locations = instruction->GetLocations();
805 DCHECK(locations->InAt(0).Equals(locations->Out()));
806 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
807 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
808 switch (instruction->GetPackedType()) {
809 case DataType::Type::kBool:
810 case DataType::Type::kUint8:
811 case DataType::Type::kInt8:
812 case DataType::Type::kUint16:
813 case DataType::Type::kInt16:
814 case DataType::Type::kInt32:
815 case DataType::Type::kInt64:
816 DCHECK_LE(2u, instruction->GetVectorLength());
817 DCHECK_LE(instruction->GetVectorLength(), 16u);
818 __ pandn(dst, src);
819 break;
820 case DataType::Type::kFloat32:
821 DCHECK_EQ(4u, instruction->GetVectorLength());
822 __ andnps(dst, src);
823 break;
824 case DataType::Type::kFloat64:
825 DCHECK_EQ(2u, instruction->GetVectorLength());
826 __ andnpd(dst, src);
827 break;
828 default:
829 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
830 UNREACHABLE();
831 }
832 }
833
VisitVecOr(HVecOr * instruction)834 void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) {
835 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
836 }
837
VisitVecOr(HVecOr * instruction)838 void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
839 LocationSummary* locations = instruction->GetLocations();
840 DCHECK(locations->InAt(0).Equals(locations->Out()));
841 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
842 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
843 switch (instruction->GetPackedType()) {
844 case DataType::Type::kBool:
845 case DataType::Type::kUint8:
846 case DataType::Type::kInt8:
847 case DataType::Type::kUint16:
848 case DataType::Type::kInt16:
849 case DataType::Type::kInt32:
850 case DataType::Type::kInt64:
851 DCHECK_LE(2u, instruction->GetVectorLength());
852 DCHECK_LE(instruction->GetVectorLength(), 16u);
853 __ por(dst, src);
854 break;
855 case DataType::Type::kFloat32:
856 DCHECK_EQ(4u, instruction->GetVectorLength());
857 __ orps(dst, src);
858 break;
859 case DataType::Type::kFloat64:
860 DCHECK_EQ(2u, instruction->GetVectorLength());
861 __ orpd(dst, src);
862 break;
863 default:
864 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
865 UNREACHABLE();
866 }
867 }
868
VisitVecXor(HVecXor * instruction)869 void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) {
870 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
871 }
872
VisitVecXor(HVecXor * instruction)873 void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
874 LocationSummary* locations = instruction->GetLocations();
875 DCHECK(locations->InAt(0).Equals(locations->Out()));
876 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
877 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
878 switch (instruction->GetPackedType()) {
879 case DataType::Type::kBool:
880 case DataType::Type::kUint8:
881 case DataType::Type::kInt8:
882 case DataType::Type::kUint16:
883 case DataType::Type::kInt16:
884 case DataType::Type::kInt32:
885 case DataType::Type::kInt64:
886 DCHECK_LE(2u, instruction->GetVectorLength());
887 DCHECK_LE(instruction->GetVectorLength(), 16u);
888 __ pxor(dst, src);
889 break;
890 case DataType::Type::kFloat32:
891 DCHECK_EQ(4u, instruction->GetVectorLength());
892 __ xorps(dst, src);
893 break;
894 case DataType::Type::kFloat64:
895 DCHECK_EQ(2u, instruction->GetVectorLength());
896 __ xorpd(dst, src);
897 break;
898 default:
899 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
900 UNREACHABLE();
901 }
902 }
903
904 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)905 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
906 LocationSummary* locations = new (allocator) LocationSummary(instruction);
907 switch (instruction->GetPackedType()) {
908 case DataType::Type::kUint16:
909 case DataType::Type::kInt16:
910 case DataType::Type::kInt32:
911 case DataType::Type::kInt64:
912 locations->SetInAt(0, Location::RequiresFpuRegister());
913 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
914 locations->SetOut(Location::SameAsFirstInput());
915 break;
916 default:
917 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
918 UNREACHABLE();
919 }
920 }
921
VisitVecShl(HVecShl * instruction)922 void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) {
923 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
924 }
925
VisitVecShl(HVecShl * instruction)926 void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
927 LocationSummary* locations = instruction->GetLocations();
928 DCHECK(locations->InAt(0).Equals(locations->Out()));
929 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
930 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
931 switch (instruction->GetPackedType()) {
932 case DataType::Type::kUint16:
933 case DataType::Type::kInt16:
934 DCHECK_EQ(8u, instruction->GetVectorLength());
935 __ psllw(dst, Immediate(static_cast<uint8_t>(value)));
936 break;
937 case DataType::Type::kInt32:
938 DCHECK_EQ(4u, instruction->GetVectorLength());
939 __ pslld(dst, Immediate(static_cast<uint8_t>(value)));
940 break;
941 case DataType::Type::kInt64:
942 DCHECK_EQ(2u, instruction->GetVectorLength());
943 __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
944 break;
945 default:
946 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
947 UNREACHABLE();
948 }
949 }
950
VisitVecShr(HVecShr * instruction)951 void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) {
952 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
953 }
954
VisitVecShr(HVecShr * instruction)955 void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
956 LocationSummary* locations = instruction->GetLocations();
957 DCHECK(locations->InAt(0).Equals(locations->Out()));
958 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
959 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
960 switch (instruction->GetPackedType()) {
961 case DataType::Type::kUint16:
962 case DataType::Type::kInt16:
963 DCHECK_EQ(8u, instruction->GetVectorLength());
964 __ psraw(dst, Immediate(static_cast<uint8_t>(value)));
965 break;
966 case DataType::Type::kInt32:
967 DCHECK_EQ(4u, instruction->GetVectorLength());
968 __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
969 break;
970 default:
971 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
972 UNREACHABLE();
973 }
974 }
975
VisitVecUShr(HVecUShr * instruction)976 void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) {
977 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
978 }
979
VisitVecUShr(HVecUShr * instruction)980 void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
981 LocationSummary* locations = instruction->GetLocations();
982 DCHECK(locations->InAt(0).Equals(locations->Out()));
983 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
984 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
985 switch (instruction->GetPackedType()) {
986 case DataType::Type::kUint16:
987 case DataType::Type::kInt16:
988 DCHECK_EQ(8u, instruction->GetVectorLength());
989 __ psrlw(dst, Immediate(static_cast<uint8_t>(value)));
990 break;
991 case DataType::Type::kInt32:
992 DCHECK_EQ(4u, instruction->GetVectorLength());
993 __ psrld(dst, Immediate(static_cast<uint8_t>(value)));
994 break;
995 case DataType::Type::kInt64:
996 DCHECK_EQ(2u, instruction->GetVectorLength());
997 __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
998 break;
999 default:
1000 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1001 UNREACHABLE();
1002 }
1003 }
1004
VisitVecSetScalars(HVecSetScalars * instruction)1005 void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
1006 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1007
1008 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
1009
1010 HInstruction* input = instruction->InputAt(0);
1011 bool is_zero = IsZeroBitPattern(input);
1012
1013 switch (instruction->GetPackedType()) {
1014 case DataType::Type::kInt64:
1015 // Long needs extra temporary to load from register pairs.
1016 if (!is_zero) {
1017 locations->AddTemp(Location::RequiresFpuRegister());
1018 }
1019 FALLTHROUGH_INTENDED;
1020 case DataType::Type::kBool:
1021 case DataType::Type::kUint8:
1022 case DataType::Type::kInt8:
1023 case DataType::Type::kUint16:
1024 case DataType::Type::kInt16:
1025 case DataType::Type::kInt32:
1026 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1027 : Location::RequiresRegister());
1028 locations->SetOut(Location::RequiresFpuRegister());
1029 break;
1030 case DataType::Type::kFloat32:
1031 case DataType::Type::kFloat64:
1032 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1033 : Location::RequiresFpuRegister());
1034 locations->SetOut(Location::RequiresFpuRegister());
1035 break;
1036 default:
1037 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1038 UNREACHABLE();
1039 }
1040 }
1041
VisitVecSetScalars(HVecSetScalars * instruction)1042 void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
1043 LocationSummary* locations = instruction->GetLocations();
1044 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1045
1046 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
1047
1048 // Zero out all other elements first.
1049 __ xorps(dst, dst);
1050
1051 // Shorthand for any type of zero.
1052 if (IsZeroBitPattern(instruction->InputAt(0))) {
1053 return;
1054 }
1055
1056 // Set required elements.
1057 switch (instruction->GetPackedType()) {
1058 case DataType::Type::kBool:
1059 case DataType::Type::kUint8:
1060 case DataType::Type::kInt8:
1061 case DataType::Type::kUint16:
1062 case DataType::Type::kInt16: // TODO: up to here, and?
1063 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1064 UNREACHABLE();
1065 case DataType::Type::kInt32:
1066 DCHECK_EQ(4u, instruction->GetVectorLength());
1067 __ movd(dst, locations->InAt(0).AsRegister<Register>());
1068 break;
1069 case DataType::Type::kInt64: {
1070 DCHECK_EQ(2u, instruction->GetVectorLength());
1071 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1072 __ xorps(tmp, tmp);
1073 __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
1074 __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
1075 __ punpckldq(dst, tmp);
1076 break;
1077 }
1078 case DataType::Type::kFloat32:
1079 DCHECK_EQ(4u, instruction->GetVectorLength());
1080 __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
1081 break;
1082 case DataType::Type::kFloat64:
1083 DCHECK_EQ(2u, instruction->GetVectorLength());
1084 __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
1085 break;
1086 default:
1087 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1088 UNREACHABLE();
1089 }
1090 }
1091
1092 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1093 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1094 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1095 switch (instruction->GetPackedType()) {
1096 case DataType::Type::kUint8:
1097 case DataType::Type::kInt8:
1098 case DataType::Type::kUint16:
1099 case DataType::Type::kInt16:
1100 case DataType::Type::kInt32:
1101 case DataType::Type::kInt64:
1102 locations->SetInAt(0, Location::RequiresFpuRegister());
1103 locations->SetInAt(1, Location::RequiresFpuRegister());
1104 locations->SetInAt(2, Location::RequiresFpuRegister());
1105 locations->SetOut(Location::SameAsFirstInput());
1106 break;
1107 default:
1108 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1109 UNREACHABLE();
1110 }
1111 }
1112
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1113 void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1114 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1115 }
1116
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1117 void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1118 // TODO: pmaddwd?
1119 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1120 }
1121
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1122 void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1123 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1124 }
1125
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1126 void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1127 // TODO: psadbw for unsigned?
1128 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1129 }
1130
VisitVecDotProd(HVecDotProd * instruction)1131 void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) {
1132 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1133 }
1134
VisitVecDotProd(HVecDotProd * instruction)1135 void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) {
1136 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1137 }
1138
1139 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1140 static void CreateVecMemLocations(ArenaAllocator* allocator,
1141 HVecMemoryOperation* instruction,
1142 bool is_load) {
1143 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1144 switch (instruction->GetPackedType()) {
1145 case DataType::Type::kBool:
1146 case DataType::Type::kUint8:
1147 case DataType::Type::kInt8:
1148 case DataType::Type::kUint16:
1149 case DataType::Type::kInt16:
1150 case DataType::Type::kInt32:
1151 case DataType::Type::kInt64:
1152 case DataType::Type::kFloat32:
1153 case DataType::Type::kFloat64:
1154 locations->SetInAt(0, Location::RequiresRegister());
1155 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1156 if (is_load) {
1157 locations->SetOut(Location::RequiresFpuRegister());
1158 } else {
1159 locations->SetInAt(2, Location::RequiresFpuRegister());
1160 }
1161 break;
1162 default:
1163 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1164 UNREACHABLE();
1165 }
1166 }
1167
1168 // Helper to construct address for vector memory operations.
VecAddress(LocationSummary * locations,size_t size,bool is_string_char_at)1169 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
1170 Location base = locations->InAt(0);
1171 Location index = locations->InAt(1);
1172 ScaleFactor scale = TIMES_1;
1173 switch (size) {
1174 case 2: scale = TIMES_2; break;
1175 case 4: scale = TIMES_4; break;
1176 case 8: scale = TIMES_8; break;
1177 default: break;
1178 }
1179 // Incorporate the string or array offset in the address computation.
1180 uint32_t offset = is_string_char_at
1181 ? mirror::String::ValueOffset().Uint32Value()
1182 : mirror::Array::DataOffset(size).Uint32Value();
1183 return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
1184 }
1185
VisitVecLoad(HVecLoad * instruction)1186 void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
1187 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1188 // String load requires a temporary for the compressed load.
1189 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1190 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1191 }
1192 }
1193
VisitVecLoad(HVecLoad * instruction)1194 void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
1195 LocationSummary* locations = instruction->GetLocations();
1196 size_t size = DataType::Size(instruction->GetPackedType());
1197 Address address = VecAddress(locations, size, instruction->IsStringCharAt());
1198 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
1199 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1200 switch (instruction->GetPackedType()) {
1201 case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1202 case DataType::Type::kUint16:
1203 DCHECK_EQ(8u, instruction->GetVectorLength());
1204 // Special handling of compressed/uncompressed string load.
1205 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1206 NearLabel done, not_compressed;
1207 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1208 // Test compression bit.
1209 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1210 "Expecting 0=compressed, 1=uncompressed");
1211 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1212 __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
1213 __ j(kNotZero, ¬_compressed);
1214 // Zero extend 8 compressed bytes into 8 chars.
1215 __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
1216 __ pxor(tmp, tmp);
1217 __ punpcklbw(reg, tmp);
1218 __ jmp(&done);
1219 // Load 4 direct uncompressed chars.
1220 __ Bind(¬_compressed);
1221 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1222 __ Bind(&done);
1223 return;
1224 }
1225 FALLTHROUGH_INTENDED;
1226 case DataType::Type::kBool:
1227 case DataType::Type::kUint8:
1228 case DataType::Type::kInt8:
1229 case DataType::Type::kInt32:
1230 case DataType::Type::kInt64:
1231 DCHECK_LE(2u, instruction->GetVectorLength());
1232 DCHECK_LE(instruction->GetVectorLength(), 16u);
1233 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1234 break;
1235 case DataType::Type::kFloat32:
1236 DCHECK_EQ(4u, instruction->GetVectorLength());
1237 is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
1238 break;
1239 case DataType::Type::kFloat64:
1240 DCHECK_EQ(2u, instruction->GetVectorLength());
1241 is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
1242 break;
1243 default:
1244 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1245 UNREACHABLE();
1246 }
1247 }
1248
VisitVecStore(HVecStore * instruction)1249 void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
1250 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1251 }
1252
VisitVecStore(HVecStore * instruction)1253 void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
1254 LocationSummary* locations = instruction->GetLocations();
1255 size_t size = DataType::Size(instruction->GetPackedType());
1256 Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
1257 XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
1258 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1259 switch (instruction->GetPackedType()) {
1260 case DataType::Type::kBool:
1261 case DataType::Type::kUint8:
1262 case DataType::Type::kInt8:
1263 case DataType::Type::kUint16:
1264 case DataType::Type::kInt16:
1265 case DataType::Type::kInt32:
1266 case DataType::Type::kInt64:
1267 DCHECK_LE(2u, instruction->GetVectorLength());
1268 DCHECK_LE(instruction->GetVectorLength(), 16u);
1269 is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
1270 break;
1271 case DataType::Type::kFloat32:
1272 DCHECK_EQ(4u, instruction->GetVectorLength());
1273 is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
1274 break;
1275 case DataType::Type::kFloat64:
1276 DCHECK_EQ(2u, instruction->GetVectorLength());
1277 is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
1278 break;
1279 default:
1280 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1281 UNREACHABLE();
1282 }
1283 }
1284
1285 #undef __
1286
1287 } // namespace x86
1288 } // namespace art
1289