1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86.h"
18 
19 #include "mirror/array-inl.h"
20 #include "mirror/string.h"
21 
22 namespace art {
23 namespace x86 {
24 
25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
26 #define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
27 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)28 void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
29   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
30   HInstruction* input = instruction->InputAt(0);
31   bool is_zero = IsZeroBitPattern(input);
32   switch (instruction->GetPackedType()) {
33     case DataType::Type::kInt64:
34       // Long needs extra temporary to load from the register pair.
35       if (!is_zero) {
36         locations->AddTemp(Location::RequiresFpuRegister());
37       }
38       FALLTHROUGH_INTENDED;
39     case DataType::Type::kBool:
40     case DataType::Type::kUint8:
41     case DataType::Type::kInt8:
42     case DataType::Type::kUint16:
43     case DataType::Type::kInt16:
44     case DataType::Type::kInt32:
45       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
46                                     : Location::RequiresRegister());
47       locations->SetOut(Location::RequiresFpuRegister());
48       break;
49     case DataType::Type::kFloat32:
50     case DataType::Type::kFloat64:
51       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
52                                     : Location::RequiresFpuRegister());
53       locations->SetOut(is_zero ? Location::RequiresFpuRegister()
54                                 : Location::SameAsFirstInput());
55       break;
56     default:
57       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
58       UNREACHABLE();
59   }
60 }
61 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)62 void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
63   LocationSummary* locations = instruction->GetLocations();
64   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
65 
66   // Shorthand for any type of zero.
67   if (IsZeroBitPattern(instruction->InputAt(0))) {
68     __ xorps(dst, dst);
69     return;
70   }
71 
72   switch (instruction->GetPackedType()) {
73     case DataType::Type::kBool:
74     case DataType::Type::kUint8:
75     case DataType::Type::kInt8:
76       DCHECK_EQ(16u, instruction->GetVectorLength());
77       __ movd(dst, locations->InAt(0).AsRegister<Register>());
78       __ punpcklbw(dst, dst);
79       __ punpcklwd(dst, dst);
80       __ pshufd(dst, dst, Immediate(0));
81       break;
82     case DataType::Type::kUint16:
83     case DataType::Type::kInt16:
84       DCHECK_EQ(8u, instruction->GetVectorLength());
85       __ movd(dst, locations->InAt(0).AsRegister<Register>());
86       __ punpcklwd(dst, dst);
87       __ pshufd(dst, dst, Immediate(0));
88       break;
89     case DataType::Type::kInt32:
90       DCHECK_EQ(4u, instruction->GetVectorLength());
91       __ movd(dst, locations->InAt(0).AsRegister<Register>());
92       __ pshufd(dst, dst, Immediate(0));
93       break;
94     case DataType::Type::kInt64: {
95       DCHECK_EQ(2u, instruction->GetVectorLength());
96       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
97       __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
98       __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
99       __ punpckldq(dst, tmp);
100       __ punpcklqdq(dst, dst);
101       break;
102     }
103     case DataType::Type::kFloat32:
104       DCHECK_EQ(4u, instruction->GetVectorLength());
105       DCHECK(locations->InAt(0).Equals(locations->Out()));
106       __ shufps(dst, dst, Immediate(0));
107       break;
108     case DataType::Type::kFloat64:
109       DCHECK_EQ(2u, instruction->GetVectorLength());
110       DCHECK(locations->InAt(0).Equals(locations->Out()));
111       __ shufpd(dst, dst, Immediate(0));
112       break;
113     default:
114       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
115       UNREACHABLE();
116   }
117 }
118 
VisitVecExtractScalar(HVecExtractScalar * instruction)119 void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
120   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
121   switch (instruction->GetPackedType()) {
122     case DataType::Type::kInt64:
123       // Long needs extra temporary to store into the register pair.
124       locations->AddTemp(Location::RequiresFpuRegister());
125       FALLTHROUGH_INTENDED;
126     case DataType::Type::kBool:
127     case DataType::Type::kUint8:
128     case DataType::Type::kInt8:
129     case DataType::Type::kUint16:
130     case DataType::Type::kInt16:
131     case DataType::Type::kInt32:
132       locations->SetInAt(0, Location::RequiresFpuRegister());
133       locations->SetOut(Location::RequiresRegister());
134       break;
135     case DataType::Type::kFloat32:
136     case DataType::Type::kFloat64:
137       locations->SetInAt(0, Location::RequiresFpuRegister());
138       locations->SetOut(Location::SameAsFirstInput());
139       break;
140     default:
141       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
142       UNREACHABLE();
143   }
144 }
145 
VisitVecExtractScalar(HVecExtractScalar * instruction)146 void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
147   LocationSummary* locations = instruction->GetLocations();
148   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
149   switch (instruction->GetPackedType()) {
150     case DataType::Type::kBool:
151     case DataType::Type::kUint8:
152     case DataType::Type::kInt8:
153     case DataType::Type::kUint16:
154     case DataType::Type::kInt16:  // TODO: up to here, and?
155       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
156       UNREACHABLE();
157     case DataType::Type::kInt32:
158       DCHECK_LE(4u, instruction->GetVectorLength());
159       DCHECK_LE(instruction->GetVectorLength(), 16u);
160       __ movd(locations->Out().AsRegister<Register>(), src);
161       break;
162     case DataType::Type::kInt64: {
163       DCHECK_EQ(2u, instruction->GetVectorLength());
164       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
165       __ movd(locations->Out().AsRegisterPairLow<Register>(), src);
166       __ pshufd(tmp, src, Immediate(1));
167       __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp);
168       break;
169     }
170     case DataType::Type::kFloat32:
171     case DataType::Type::kFloat64:
172       DCHECK_LE(2u, instruction->GetVectorLength());
173       DCHECK_LE(instruction->GetVectorLength(), 4u);
174       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
175       break;
176     default:
177       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
178       UNREACHABLE();
179   }
180 }
181 
182 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)183 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
184   LocationSummary* locations = new (allocator) LocationSummary(instruction);
185   switch (instruction->GetPackedType()) {
186     case DataType::Type::kBool:
187     case DataType::Type::kUint8:
188     case DataType::Type::kInt8:
189     case DataType::Type::kUint16:
190     case DataType::Type::kInt16:
191     case DataType::Type::kInt32:
192     case DataType::Type::kInt64:
193     case DataType::Type::kFloat32:
194     case DataType::Type::kFloat64:
195       locations->SetInAt(0, Location::RequiresFpuRegister());
196       locations->SetOut(Location::RequiresFpuRegister());
197       break;
198     default:
199       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
200       UNREACHABLE();
201   }
202 }
203 
VisitVecReduce(HVecReduce * instruction)204 void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) {
205   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
206   // Long reduction or min/max require a temporary.
207   if (instruction->GetPackedType() == DataType::Type::kInt64 ||
208       instruction->GetReductionKind() == HVecReduce::kMin ||
209       instruction->GetReductionKind() == HVecReduce::kMax) {
210     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
211   }
212 }
213 
VisitVecReduce(HVecReduce * instruction)214 void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
215   LocationSummary* locations = instruction->GetLocations();
216   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
217   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
218   switch (instruction->GetPackedType()) {
219     case DataType::Type::kInt32:
220       DCHECK_EQ(4u, instruction->GetVectorLength());
221       switch (instruction->GetReductionKind()) {
222         case HVecReduce::kSum:
223           __ movaps(dst, src);
224           __ phaddd(dst, dst);
225           __ phaddd(dst, dst);
226           break;
227         case HVecReduce::kMin:
228         case HVecReduce::kMax:
229           // Historical note: We've had a broken implementation here. b/117863065
230           // Do not draw on the old code if we ever want to bring MIN/MAX reduction back.
231           LOG(FATAL) << "Unsupported reduction type.";
232       }
233       break;
234     case DataType::Type::kInt64: {
235       DCHECK_EQ(2u, instruction->GetVectorLength());
236       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
237       switch (instruction->GetReductionKind()) {
238         case HVecReduce::kSum:
239           __ movaps(tmp, src);
240           __ movaps(dst, src);
241           __ punpckhqdq(tmp, tmp);
242           __ paddq(dst, tmp);
243           break;
244         case HVecReduce::kMin:
245         case HVecReduce::kMax:
246           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
247       }
248       break;
249     }
250     default:
251       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
252       UNREACHABLE();
253   }
254 }
255 
VisitVecCnv(HVecCnv * instruction)256 void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
257   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
258 }
259 
VisitVecCnv(HVecCnv * instruction)260 void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
261   LocationSummary* locations = instruction->GetLocations();
262   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
263   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
264   DataType::Type from = instruction->GetInputType();
265   DataType::Type to = instruction->GetResultType();
266   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
267     DCHECK_EQ(4u, instruction->GetVectorLength());
268     __ cvtdq2ps(dst, src);
269   } else {
270     LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
271   }
272 }
273 
VisitVecNeg(HVecNeg * instruction)274 void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) {
275   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
276 }
277 
VisitVecNeg(HVecNeg * instruction)278 void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
279   LocationSummary* locations = instruction->GetLocations();
280   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
281   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
282   switch (instruction->GetPackedType()) {
283     case DataType::Type::kUint8:
284     case DataType::Type::kInt8:
285       DCHECK_EQ(16u, instruction->GetVectorLength());
286       __ pxor(dst, dst);
287       __ psubb(dst, src);
288       break;
289     case DataType::Type::kUint16:
290     case DataType::Type::kInt16:
291       DCHECK_EQ(8u, instruction->GetVectorLength());
292       __ pxor(dst, dst);
293       __ psubw(dst, src);
294       break;
295     case DataType::Type::kInt32:
296       DCHECK_EQ(4u, instruction->GetVectorLength());
297       __ pxor(dst, dst);
298       __ psubd(dst, src);
299       break;
300     case DataType::Type::kInt64:
301       DCHECK_EQ(2u, instruction->GetVectorLength());
302       __ pxor(dst, dst);
303       __ psubq(dst, src);
304       break;
305     case DataType::Type::kFloat32:
306       DCHECK_EQ(4u, instruction->GetVectorLength());
307       __ xorps(dst, dst);
308       __ subps(dst, src);
309       break;
310     case DataType::Type::kFloat64:
311       DCHECK_EQ(2u, instruction->GetVectorLength());
312       __ xorpd(dst, dst);
313       __ subpd(dst, src);
314       break;
315     default:
316       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
317       UNREACHABLE();
318   }
319 }
320 
VisitVecAbs(HVecAbs * instruction)321 void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
322   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
323   // Integral-abs requires a temporary for the comparison.
324   if (instruction->GetPackedType() == DataType::Type::kInt32) {
325     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
326   }
327 }
328 
VisitVecAbs(HVecAbs * instruction)329 void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
330   LocationSummary* locations = instruction->GetLocations();
331   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
332   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
333   switch (instruction->GetPackedType()) {
334     case DataType::Type::kInt32: {
335       DCHECK_EQ(4u, instruction->GetVectorLength());
336       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
337       __ movaps(dst, src);
338       __ pxor(tmp, tmp);
339       __ pcmpgtd(tmp, dst);
340       __ pxor(dst, tmp);
341       __ psubd(dst, tmp);
342       break;
343     }
344     case DataType::Type::kFloat32:
345       DCHECK_EQ(4u, instruction->GetVectorLength());
346       __ pcmpeqb(dst, dst);  // all ones
347       __ psrld(dst, Immediate(1));
348       __ andps(dst, src);
349       break;
350     case DataType::Type::kFloat64:
351       DCHECK_EQ(2u, instruction->GetVectorLength());
352       __ pcmpeqb(dst, dst);  // all ones
353       __ psrlq(dst, Immediate(1));
354       __ andpd(dst, src);
355       break;
356     default:
357       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
358       UNREACHABLE();
359   }
360 }
361 
VisitVecNot(HVecNot * instruction)362 void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
363   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
364   // Boolean-not requires a temporary to construct the 16 x one.
365   if (instruction->GetPackedType() == DataType::Type::kBool) {
366     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
367   }
368 }
369 
VisitVecNot(HVecNot * instruction)370 void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
371   LocationSummary* locations = instruction->GetLocations();
372   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
373   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
374   switch (instruction->GetPackedType()) {
375     case DataType::Type::kBool: {  // special case boolean-not
376       DCHECK_EQ(16u, instruction->GetVectorLength());
377       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
378       __ pxor(dst, dst);
379       __ pcmpeqb(tmp, tmp);  // all ones
380       __ psubb(dst, tmp);  // 16 x one
381       __ pxor(dst, src);
382       break;
383     }
384     case DataType::Type::kUint8:
385     case DataType::Type::kInt8:
386     case DataType::Type::kUint16:
387     case DataType::Type::kInt16:
388     case DataType::Type::kInt32:
389     case DataType::Type::kInt64:
390       DCHECK_LE(2u, instruction->GetVectorLength());
391       DCHECK_LE(instruction->GetVectorLength(), 16u);
392       __ pcmpeqb(dst, dst);  // all ones
393       __ pxor(dst, src);
394       break;
395     case DataType::Type::kFloat32:
396       DCHECK_EQ(4u, instruction->GetVectorLength());
397       __ pcmpeqb(dst, dst);  // all ones
398       __ xorps(dst, src);
399       break;
400     case DataType::Type::kFloat64:
401       DCHECK_EQ(2u, instruction->GetVectorLength());
402       __ pcmpeqb(dst, dst);  // all ones
403       __ xorpd(dst, src);
404       break;
405     default:
406       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
407       UNREACHABLE();
408   }
409 }
410 
411 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)412 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
413   LocationSummary* locations = new (allocator) LocationSummary(instruction);
414   switch (instruction->GetPackedType()) {
415     case DataType::Type::kBool:
416     case DataType::Type::kUint8:
417     case DataType::Type::kInt8:
418     case DataType::Type::kUint16:
419     case DataType::Type::kInt16:
420     case DataType::Type::kInt32:
421     case DataType::Type::kInt64:
422     case DataType::Type::kFloat32:
423     case DataType::Type::kFloat64:
424       locations->SetInAt(0, Location::RequiresFpuRegister());
425       locations->SetInAt(1, Location::RequiresFpuRegister());
426       locations->SetOut(Location::SameAsFirstInput());
427       break;
428     default:
429       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
430       UNREACHABLE();
431   }
432 }
433 
VisitVecAdd(HVecAdd * instruction)434 void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
435   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
436 }
437 
VisitVecAdd(HVecAdd * instruction)438 void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
439   LocationSummary* locations = instruction->GetLocations();
440   DCHECK(locations->InAt(0).Equals(locations->Out()));
441   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
442   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
443   switch (instruction->GetPackedType()) {
444     case DataType::Type::kUint8:
445     case DataType::Type::kInt8:
446       DCHECK_EQ(16u, instruction->GetVectorLength());
447       __ paddb(dst, src);
448       break;
449     case DataType::Type::kUint16:
450     case DataType::Type::kInt16:
451       DCHECK_EQ(8u, instruction->GetVectorLength());
452       __ paddw(dst, src);
453       break;
454     case DataType::Type::kInt32:
455       DCHECK_EQ(4u, instruction->GetVectorLength());
456       __ paddd(dst, src);
457       break;
458     case DataType::Type::kInt64:
459       DCHECK_EQ(2u, instruction->GetVectorLength());
460       __ paddq(dst, src);
461       break;
462     case DataType::Type::kFloat32:
463       DCHECK_EQ(4u, instruction->GetVectorLength());
464       __ addps(dst, src);
465       break;
466     case DataType::Type::kFloat64:
467       DCHECK_EQ(2u, instruction->GetVectorLength());
468       __ addpd(dst, src);
469       break;
470     default:
471       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
472       UNREACHABLE();
473   }
474 }
475 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)476 void LocationsBuilderX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
477   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
478 }
479 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)480 void InstructionCodeGeneratorX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
481   LocationSummary* locations = instruction->GetLocations();
482   DCHECK(locations->InAt(0).Equals(locations->Out()));
483   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
484   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
485   switch (instruction->GetPackedType()) {
486     case DataType::Type::kUint8:
487       DCHECK_EQ(16u, instruction->GetVectorLength());
488       __ paddusb(dst, src);
489       break;
490     case DataType::Type::kInt8:
491       DCHECK_EQ(16u, instruction->GetVectorLength());
492       __ paddsb(dst, src);
493       break;
494     case DataType::Type::kUint16:
495       DCHECK_EQ(8u, instruction->GetVectorLength());
496       __ paddusw(dst, src);
497       break;
498     case DataType::Type::kInt16:
499       DCHECK_EQ(8u, instruction->GetVectorLength());
500       __ paddsw(dst, src);
501       break;
502     default:
503       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
504       UNREACHABLE();
505   }
506 }
507 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)508 void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
509   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
510 }
511 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)512 void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
513   LocationSummary* locations = instruction->GetLocations();
514   DCHECK(locations->InAt(0).Equals(locations->Out()));
515   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
516   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
517 
518   DCHECK(instruction->IsRounded());
519 
520   switch (instruction->GetPackedType()) {
521     case DataType::Type::kUint8:
522       DCHECK_EQ(16u, instruction->GetVectorLength());
523       __ pavgb(dst, src);
524       break;
525     case DataType::Type::kUint16:
526       DCHECK_EQ(8u, instruction->GetVectorLength());
527       __ pavgw(dst, src);
528       break;
529     default:
530       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
531       UNREACHABLE();
532   }
533 }
534 
VisitVecSub(HVecSub * instruction)535 void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
536   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
537 }
538 
VisitVecSub(HVecSub * instruction)539 void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
540   LocationSummary* locations = instruction->GetLocations();
541   DCHECK(locations->InAt(0).Equals(locations->Out()));
542   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
543   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
544   switch (instruction->GetPackedType()) {
545     case DataType::Type::kUint8:
546     case DataType::Type::kInt8:
547       DCHECK_EQ(16u, instruction->GetVectorLength());
548       __ psubb(dst, src);
549       break;
550     case DataType::Type::kUint16:
551     case DataType::Type::kInt16:
552       DCHECK_EQ(8u, instruction->GetVectorLength());
553       __ psubw(dst, src);
554       break;
555     case DataType::Type::kInt32:
556       DCHECK_EQ(4u, instruction->GetVectorLength());
557       __ psubd(dst, src);
558       break;
559     case DataType::Type::kInt64:
560       DCHECK_EQ(2u, instruction->GetVectorLength());
561       __ psubq(dst, src);
562       break;
563     case DataType::Type::kFloat32:
564       DCHECK_EQ(4u, instruction->GetVectorLength());
565       __ subps(dst, src);
566       break;
567     case DataType::Type::kFloat64:
568       DCHECK_EQ(2u, instruction->GetVectorLength());
569       __ subpd(dst, src);
570       break;
571     default:
572       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
573       UNREACHABLE();
574   }
575 }
576 
VisitVecSaturationSub(HVecSaturationSub * instruction)577 void LocationsBuilderX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
578   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
579 }
580 
VisitVecSaturationSub(HVecSaturationSub * instruction)581 void InstructionCodeGeneratorX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
582   LocationSummary* locations = instruction->GetLocations();
583   DCHECK(locations->InAt(0).Equals(locations->Out()));
584   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
585   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
586   switch (instruction->GetPackedType()) {
587     case DataType::Type::kUint8:
588       DCHECK_EQ(16u, instruction->GetVectorLength());
589       __ psubusb(dst, src);
590       break;
591     case DataType::Type::kInt8:
592       DCHECK_EQ(16u, instruction->GetVectorLength());
593       __ psubsb(dst, src);
594       break;
595     case DataType::Type::kUint16:
596       DCHECK_EQ(8u, instruction->GetVectorLength());
597       __ psubusw(dst, src);
598       break;
599     case DataType::Type::kInt16:
600       DCHECK_EQ(8u, instruction->GetVectorLength());
601       __ psubsw(dst, src);
602       break;
603     default:
604       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
605       UNREACHABLE();
606   }
607 }
608 
VisitVecMul(HVecMul * instruction)609 void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
610   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
611 }
612 
VisitVecMul(HVecMul * instruction)613 void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
614   LocationSummary* locations = instruction->GetLocations();
615   DCHECK(locations->InAt(0).Equals(locations->Out()));
616   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
617   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
618   switch (instruction->GetPackedType()) {
619     case DataType::Type::kUint16:
620     case DataType::Type::kInt16:
621       DCHECK_EQ(8u, instruction->GetVectorLength());
622       __ pmullw(dst, src);
623       break;
624     case DataType::Type::kInt32:
625       DCHECK_EQ(4u, instruction->GetVectorLength());
626       __ pmulld(dst, src);
627       break;
628     case DataType::Type::kFloat32:
629       DCHECK_EQ(4u, instruction->GetVectorLength());
630       __ mulps(dst, src);
631       break;
632     case DataType::Type::kFloat64:
633       DCHECK_EQ(2u, instruction->GetVectorLength());
634       __ mulpd(dst, src);
635       break;
636     default:
637       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
638       UNREACHABLE();
639   }
640 }
641 
VisitVecDiv(HVecDiv * instruction)642 void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
643   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
644 }
645 
VisitVecDiv(HVecDiv * instruction)646 void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
647   LocationSummary* locations = instruction->GetLocations();
648   DCHECK(locations->InAt(0).Equals(locations->Out()));
649   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
650   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
651   switch (instruction->GetPackedType()) {
652     case DataType::Type::kFloat32:
653       DCHECK_EQ(4u, instruction->GetVectorLength());
654       __ divps(dst, src);
655       break;
656     case DataType::Type::kFloat64:
657       DCHECK_EQ(2u, instruction->GetVectorLength());
658       __ divpd(dst, src);
659       break;
660     default:
661       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
662       UNREACHABLE();
663   }
664 }
665 
VisitVecMin(HVecMin * instruction)666 void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
667   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
668 }
669 
VisitVecMin(HVecMin * instruction)670 void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
671   LocationSummary* locations = instruction->GetLocations();
672   DCHECK(locations->InAt(0).Equals(locations->Out()));
673   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
674   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
675   switch (instruction->GetPackedType()) {
676     case DataType::Type::kUint8:
677       DCHECK_EQ(16u, instruction->GetVectorLength());
678       __ pminub(dst, src);
679       break;
680     case DataType::Type::kInt8:
681       DCHECK_EQ(16u, instruction->GetVectorLength());
682       __ pminsb(dst, src);
683       break;
684     case DataType::Type::kUint16:
685       DCHECK_EQ(8u, instruction->GetVectorLength());
686       __ pminuw(dst, src);
687       break;
688     case DataType::Type::kInt16:
689       DCHECK_EQ(8u, instruction->GetVectorLength());
690       __ pminsw(dst, src);
691       break;
692     case DataType::Type::kUint32:
693       DCHECK_EQ(4u, instruction->GetVectorLength());
694       __ pminud(dst, src);
695       break;
696     case DataType::Type::kInt32:
697       DCHECK_EQ(4u, instruction->GetVectorLength());
698       __ pminsd(dst, src);
699       break;
700     // Next cases are sloppy wrt 0.0 vs -0.0.
701     case DataType::Type::kFloat32:
702       DCHECK_EQ(4u, instruction->GetVectorLength());
703       __ minps(dst, src);
704       break;
705     case DataType::Type::kFloat64:
706       DCHECK_EQ(2u, instruction->GetVectorLength());
707       __ minpd(dst, src);
708       break;
709     default:
710       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
711       UNREACHABLE();
712   }
713 }
714 
VisitVecMax(HVecMax * instruction)715 void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
716   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
717 }
718 
VisitVecMax(HVecMax * instruction)719 void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
720   LocationSummary* locations = instruction->GetLocations();
721   DCHECK(locations->InAt(0).Equals(locations->Out()));
722   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
723   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
724   switch (instruction->GetPackedType()) {
725     case DataType::Type::kUint8:
726       DCHECK_EQ(16u, instruction->GetVectorLength());
727       __ pmaxub(dst, src);
728       break;
729     case DataType::Type::kInt8:
730       DCHECK_EQ(16u, instruction->GetVectorLength());
731       __ pmaxsb(dst, src);
732       break;
733     case DataType::Type::kUint16:
734       DCHECK_EQ(8u, instruction->GetVectorLength());
735       __ pmaxuw(dst, src);
736       break;
737     case DataType::Type::kInt16:
738       DCHECK_EQ(8u, instruction->GetVectorLength());
739       __ pmaxsw(dst, src);
740       break;
741     case DataType::Type::kUint32:
742       DCHECK_EQ(4u, instruction->GetVectorLength());
743       __ pmaxud(dst, src);
744       break;
745     case DataType::Type::kInt32:
746       DCHECK_EQ(4u, instruction->GetVectorLength());
747       __ pmaxsd(dst, src);
748       break;
749     // Next cases are sloppy wrt 0.0 vs -0.0.
750     case DataType::Type::kFloat32:
751       DCHECK_EQ(4u, instruction->GetVectorLength());
752       __ maxps(dst, src);
753       break;
754     case DataType::Type::kFloat64:
755       DCHECK_EQ(2u, instruction->GetVectorLength());
756       __ maxpd(dst, src);
757       break;
758     default:
759       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
760       UNREACHABLE();
761   }
762 }
763 
VisitVecAnd(HVecAnd * instruction)764 void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
765   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
766 }
767 
VisitVecAnd(HVecAnd * instruction)768 void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
769   LocationSummary* locations = instruction->GetLocations();
770   DCHECK(locations->InAt(0).Equals(locations->Out()));
771   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
772   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
773   switch (instruction->GetPackedType()) {
774     case DataType::Type::kBool:
775     case DataType::Type::kUint8:
776     case DataType::Type::kInt8:
777     case DataType::Type::kUint16:
778     case DataType::Type::kInt16:
779     case DataType::Type::kInt32:
780     case DataType::Type::kInt64:
781       DCHECK_LE(2u, instruction->GetVectorLength());
782       DCHECK_LE(instruction->GetVectorLength(), 16u);
783       __ pand(dst, src);
784       break;
785     case DataType::Type::kFloat32:
786       DCHECK_EQ(4u, instruction->GetVectorLength());
787       __ andps(dst, src);
788       break;
789     case DataType::Type::kFloat64:
790       DCHECK_EQ(2u, instruction->GetVectorLength());
791       __ andpd(dst, src);
792       break;
793     default:
794       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
795       UNREACHABLE();
796   }
797 }
798 
VisitVecAndNot(HVecAndNot * instruction)799 void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) {
800   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
801 }
802 
VisitVecAndNot(HVecAndNot * instruction)803 void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
804   LocationSummary* locations = instruction->GetLocations();
805   DCHECK(locations->InAt(0).Equals(locations->Out()));
806   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
807   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
808   switch (instruction->GetPackedType()) {
809     case DataType::Type::kBool:
810     case DataType::Type::kUint8:
811     case DataType::Type::kInt8:
812     case DataType::Type::kUint16:
813     case DataType::Type::kInt16:
814     case DataType::Type::kInt32:
815     case DataType::Type::kInt64:
816       DCHECK_LE(2u, instruction->GetVectorLength());
817       DCHECK_LE(instruction->GetVectorLength(), 16u);
818       __ pandn(dst, src);
819       break;
820     case DataType::Type::kFloat32:
821       DCHECK_EQ(4u, instruction->GetVectorLength());
822       __ andnps(dst, src);
823       break;
824     case DataType::Type::kFloat64:
825       DCHECK_EQ(2u, instruction->GetVectorLength());
826       __ andnpd(dst, src);
827       break;
828     default:
829       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
830       UNREACHABLE();
831   }
832 }
833 
VisitVecOr(HVecOr * instruction)834 void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) {
835   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
836 }
837 
VisitVecOr(HVecOr * instruction)838 void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
839   LocationSummary* locations = instruction->GetLocations();
840   DCHECK(locations->InAt(0).Equals(locations->Out()));
841   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
842   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
843   switch (instruction->GetPackedType()) {
844     case DataType::Type::kBool:
845     case DataType::Type::kUint8:
846     case DataType::Type::kInt8:
847     case DataType::Type::kUint16:
848     case DataType::Type::kInt16:
849     case DataType::Type::kInt32:
850     case DataType::Type::kInt64:
851       DCHECK_LE(2u, instruction->GetVectorLength());
852       DCHECK_LE(instruction->GetVectorLength(), 16u);
853       __ por(dst, src);
854       break;
855     case DataType::Type::kFloat32:
856       DCHECK_EQ(4u, instruction->GetVectorLength());
857       __ orps(dst, src);
858       break;
859     case DataType::Type::kFloat64:
860       DCHECK_EQ(2u, instruction->GetVectorLength());
861       __ orpd(dst, src);
862       break;
863     default:
864       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
865       UNREACHABLE();
866   }
867 }
868 
VisitVecXor(HVecXor * instruction)869 void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) {
870   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
871 }
872 
VisitVecXor(HVecXor * instruction)873 void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
874   LocationSummary* locations = instruction->GetLocations();
875   DCHECK(locations->InAt(0).Equals(locations->Out()));
876   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
877   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
878   switch (instruction->GetPackedType()) {
879     case DataType::Type::kBool:
880     case DataType::Type::kUint8:
881     case DataType::Type::kInt8:
882     case DataType::Type::kUint16:
883     case DataType::Type::kInt16:
884     case DataType::Type::kInt32:
885     case DataType::Type::kInt64:
886       DCHECK_LE(2u, instruction->GetVectorLength());
887       DCHECK_LE(instruction->GetVectorLength(), 16u);
888       __ pxor(dst, src);
889       break;
890     case DataType::Type::kFloat32:
891       DCHECK_EQ(4u, instruction->GetVectorLength());
892       __ xorps(dst, src);
893       break;
894     case DataType::Type::kFloat64:
895       DCHECK_EQ(2u, instruction->GetVectorLength());
896       __ xorpd(dst, src);
897       break;
898     default:
899       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
900       UNREACHABLE();
901   }
902 }
903 
904 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)905 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
906   LocationSummary* locations = new (allocator) LocationSummary(instruction);
907   switch (instruction->GetPackedType()) {
908     case DataType::Type::kUint16:
909     case DataType::Type::kInt16:
910     case DataType::Type::kInt32:
911     case DataType::Type::kInt64:
912       locations->SetInAt(0, Location::RequiresFpuRegister());
913       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
914       locations->SetOut(Location::SameAsFirstInput());
915       break;
916     default:
917       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
918       UNREACHABLE();
919   }
920 }
921 
VisitVecShl(HVecShl * instruction)922 void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) {
923   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
924 }
925 
VisitVecShl(HVecShl * instruction)926 void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
927   LocationSummary* locations = instruction->GetLocations();
928   DCHECK(locations->InAt(0).Equals(locations->Out()));
929   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
930   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
931   switch (instruction->GetPackedType()) {
932     case DataType::Type::kUint16:
933     case DataType::Type::kInt16:
934       DCHECK_EQ(8u, instruction->GetVectorLength());
935       __ psllw(dst, Immediate(static_cast<uint8_t>(value)));
936       break;
937     case DataType::Type::kInt32:
938       DCHECK_EQ(4u, instruction->GetVectorLength());
939       __ pslld(dst, Immediate(static_cast<uint8_t>(value)));
940       break;
941     case DataType::Type::kInt64:
942       DCHECK_EQ(2u, instruction->GetVectorLength());
943       __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
944       break;
945     default:
946       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
947       UNREACHABLE();
948   }
949 }
950 
VisitVecShr(HVecShr * instruction)951 void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) {
952   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
953 }
954 
VisitVecShr(HVecShr * instruction)955 void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
956   LocationSummary* locations = instruction->GetLocations();
957   DCHECK(locations->InAt(0).Equals(locations->Out()));
958   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
959   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
960   switch (instruction->GetPackedType()) {
961     case DataType::Type::kUint16:
962     case DataType::Type::kInt16:
963       DCHECK_EQ(8u, instruction->GetVectorLength());
964       __ psraw(dst, Immediate(static_cast<uint8_t>(value)));
965       break;
966     case DataType::Type::kInt32:
967       DCHECK_EQ(4u, instruction->GetVectorLength());
968       __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
969       break;
970     default:
971       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
972       UNREACHABLE();
973   }
974 }
975 
VisitVecUShr(HVecUShr * instruction)976 void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) {
977   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
978 }
979 
VisitVecUShr(HVecUShr * instruction)980 void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
981   LocationSummary* locations = instruction->GetLocations();
982   DCHECK(locations->InAt(0).Equals(locations->Out()));
983   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
984   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
985   switch (instruction->GetPackedType()) {
986     case DataType::Type::kUint16:
987     case DataType::Type::kInt16:
988       DCHECK_EQ(8u, instruction->GetVectorLength());
989       __ psrlw(dst, Immediate(static_cast<uint8_t>(value)));
990       break;
991     case DataType::Type::kInt32:
992       DCHECK_EQ(4u, instruction->GetVectorLength());
993       __ psrld(dst, Immediate(static_cast<uint8_t>(value)));
994       break;
995     case DataType::Type::kInt64:
996       DCHECK_EQ(2u, instruction->GetVectorLength());
997       __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
998       break;
999     default:
1000       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1001       UNREACHABLE();
1002   }
1003 }
1004 
VisitVecSetScalars(HVecSetScalars * instruction)1005 void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
1006   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1007 
1008   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
1009 
1010   HInstruction* input = instruction->InputAt(0);
1011   bool is_zero = IsZeroBitPattern(input);
1012 
1013   switch (instruction->GetPackedType()) {
1014     case DataType::Type::kInt64:
1015       // Long needs extra temporary to load from register pairs.
1016       if (!is_zero) {
1017         locations->AddTemp(Location::RequiresFpuRegister());
1018       }
1019       FALLTHROUGH_INTENDED;
1020     case DataType::Type::kBool:
1021     case DataType::Type::kUint8:
1022     case DataType::Type::kInt8:
1023     case DataType::Type::kUint16:
1024     case DataType::Type::kInt16:
1025     case DataType::Type::kInt32:
1026       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1027                                     : Location::RequiresRegister());
1028       locations->SetOut(Location::RequiresFpuRegister());
1029       break;
1030     case DataType::Type::kFloat32:
1031     case DataType::Type::kFloat64:
1032       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1033                                     : Location::RequiresFpuRegister());
1034       locations->SetOut(Location::RequiresFpuRegister());
1035       break;
1036     default:
1037       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1038       UNREACHABLE();
1039   }
1040 }
1041 
VisitVecSetScalars(HVecSetScalars * instruction)1042 void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
1043   LocationSummary* locations = instruction->GetLocations();
1044   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1045 
1046   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
1047 
1048   // Zero out all other elements first.
1049   __ xorps(dst, dst);
1050 
1051   // Shorthand for any type of zero.
1052   if (IsZeroBitPattern(instruction->InputAt(0))) {
1053     return;
1054   }
1055 
1056   // Set required elements.
1057   switch (instruction->GetPackedType()) {
1058     case DataType::Type::kBool:
1059     case DataType::Type::kUint8:
1060     case DataType::Type::kInt8:
1061     case DataType::Type::kUint16:
1062     case DataType::Type::kInt16:  // TODO: up to here, and?
1063       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1064       UNREACHABLE();
1065     case DataType::Type::kInt32:
1066       DCHECK_EQ(4u, instruction->GetVectorLength());
1067       __ movd(dst, locations->InAt(0).AsRegister<Register>());
1068       break;
1069     case DataType::Type::kInt64: {
1070       DCHECK_EQ(2u, instruction->GetVectorLength());
1071       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1072       __ xorps(tmp, tmp);
1073       __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
1074       __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
1075       __ punpckldq(dst, tmp);
1076       break;
1077     }
1078     case DataType::Type::kFloat32:
1079       DCHECK_EQ(4u, instruction->GetVectorLength());
1080       __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
1081       break;
1082     case DataType::Type::kFloat64:
1083       DCHECK_EQ(2u, instruction->GetVectorLength());
1084       __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
1085       break;
1086     default:
1087       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1088       UNREACHABLE();
1089   }
1090 }
1091 
1092 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1093 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1094   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1095   switch (instruction->GetPackedType()) {
1096     case DataType::Type::kUint8:
1097     case DataType::Type::kInt8:
1098     case DataType::Type::kUint16:
1099     case DataType::Type::kInt16:
1100     case DataType::Type::kInt32:
1101     case DataType::Type::kInt64:
1102       locations->SetInAt(0, Location::RequiresFpuRegister());
1103       locations->SetInAt(1, Location::RequiresFpuRegister());
1104       locations->SetInAt(2, Location::RequiresFpuRegister());
1105       locations->SetOut(Location::SameAsFirstInput());
1106       break;
1107     default:
1108       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1109       UNREACHABLE();
1110   }
1111 }
1112 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1113 void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1114   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1115 }
1116 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1117 void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1118   // TODO: pmaddwd?
1119   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1120 }
1121 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1122 void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1123   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1124 }
1125 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1126 void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1127   // TODO: psadbw for unsigned?
1128   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1129 }
1130 
VisitVecDotProd(HVecDotProd * instruction)1131 void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) {
1132   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1133 }
1134 
VisitVecDotProd(HVecDotProd * instruction)1135 void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) {
1136   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1137 }
1138 
1139 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1140 static void CreateVecMemLocations(ArenaAllocator* allocator,
1141                                   HVecMemoryOperation* instruction,
1142                                   bool is_load) {
1143   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1144   switch (instruction->GetPackedType()) {
1145     case DataType::Type::kBool:
1146     case DataType::Type::kUint8:
1147     case DataType::Type::kInt8:
1148     case DataType::Type::kUint16:
1149     case DataType::Type::kInt16:
1150     case DataType::Type::kInt32:
1151     case DataType::Type::kInt64:
1152     case DataType::Type::kFloat32:
1153     case DataType::Type::kFloat64:
1154       locations->SetInAt(0, Location::RequiresRegister());
1155       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1156       if (is_load) {
1157         locations->SetOut(Location::RequiresFpuRegister());
1158       } else {
1159         locations->SetInAt(2, Location::RequiresFpuRegister());
1160       }
1161       break;
1162     default:
1163       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1164       UNREACHABLE();
1165   }
1166 }
1167 
1168 // Helper to construct address for vector memory operations.
VecAddress(LocationSummary * locations,size_t size,bool is_string_char_at)1169 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
1170   Location base = locations->InAt(0);
1171   Location index = locations->InAt(1);
1172   ScaleFactor scale = TIMES_1;
1173   switch (size) {
1174     case 2: scale = TIMES_2; break;
1175     case 4: scale = TIMES_4; break;
1176     case 8: scale = TIMES_8; break;
1177     default: break;
1178   }
1179   // Incorporate the string or array offset in the address computation.
1180   uint32_t offset = is_string_char_at
1181       ? mirror::String::ValueOffset().Uint32Value()
1182       : mirror::Array::DataOffset(size).Uint32Value();
1183   return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
1184 }
1185 
VisitVecLoad(HVecLoad * instruction)1186 void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
1187   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1188   // String load requires a temporary for the compressed load.
1189   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1190     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1191   }
1192 }
1193 
VisitVecLoad(HVecLoad * instruction)1194 void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
1195   LocationSummary* locations = instruction->GetLocations();
1196   size_t size = DataType::Size(instruction->GetPackedType());
1197   Address address = VecAddress(locations, size, instruction->IsStringCharAt());
1198   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
1199   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1200   switch (instruction->GetPackedType()) {
1201     case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1202     case DataType::Type::kUint16:
1203       DCHECK_EQ(8u, instruction->GetVectorLength());
1204       // Special handling of compressed/uncompressed string load.
1205       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1206         NearLabel done, not_compressed;
1207         XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1208         // Test compression bit.
1209         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1210                       "Expecting 0=compressed, 1=uncompressed");
1211         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1212         __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
1213         __ j(kNotZero, &not_compressed);
1214         // Zero extend 8 compressed bytes into 8 chars.
1215         __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
1216         __ pxor(tmp, tmp);
1217         __ punpcklbw(reg, tmp);
1218         __ jmp(&done);
1219         // Load 4 direct uncompressed chars.
1220         __ Bind(&not_compressed);
1221         is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
1222         __ Bind(&done);
1223         return;
1224       }
1225       FALLTHROUGH_INTENDED;
1226     case DataType::Type::kBool:
1227     case DataType::Type::kUint8:
1228     case DataType::Type::kInt8:
1229     case DataType::Type::kInt32:
1230     case DataType::Type::kInt64:
1231       DCHECK_LE(2u, instruction->GetVectorLength());
1232       DCHECK_LE(instruction->GetVectorLength(), 16u);
1233       is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1234       break;
1235     case DataType::Type::kFloat32:
1236       DCHECK_EQ(4u, instruction->GetVectorLength());
1237       is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
1238       break;
1239     case DataType::Type::kFloat64:
1240       DCHECK_EQ(2u, instruction->GetVectorLength());
1241       is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
1242       break;
1243     default:
1244       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1245       UNREACHABLE();
1246   }
1247 }
1248 
VisitVecStore(HVecStore * instruction)1249 void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
1250   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1251 }
1252 
VisitVecStore(HVecStore * instruction)1253 void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
1254   LocationSummary* locations = instruction->GetLocations();
1255   size_t size = DataType::Size(instruction->GetPackedType());
1256   Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
1257   XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
1258   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1259   switch (instruction->GetPackedType()) {
1260     case DataType::Type::kBool:
1261     case DataType::Type::kUint8:
1262     case DataType::Type::kInt8:
1263     case DataType::Type::kUint16:
1264     case DataType::Type::kInt16:
1265     case DataType::Type::kInt32:
1266     case DataType::Type::kInt64:
1267       DCHECK_LE(2u, instruction->GetVectorLength());
1268       DCHECK_LE(instruction->GetVectorLength(), 16u);
1269       is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
1270       break;
1271     case DataType::Type::kFloat32:
1272       DCHECK_EQ(4u, instruction->GetVectorLength());
1273       is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
1274       break;
1275     case DataType::Type::kFloat64:
1276       DCHECK_EQ(2u, instruction->GetVectorLength());
1277       is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
1278       break;
1279     default:
1280       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1281       UNREACHABLE();
1282   }
1283 }
1284 
1285 #undef __
1286 
1287 }  // namespace x86
1288 }  // namespace art
1289