1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86.h"
18 
19 #include "mirror/array-inl.h"
20 #include "mirror/string.h"
21 
22 namespace art {
23 namespace x86 {
24 
25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
26 #define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
27 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)28 void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
29   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
30   HInstruction* input = instruction->InputAt(0);
31   bool is_zero = IsZeroBitPattern(input);
32   switch (instruction->GetPackedType()) {
33     case DataType::Type::kInt64:
34       // Long needs extra temporary to load from the register pair.
35       if (!is_zero) {
36         locations->AddTemp(Location::RequiresFpuRegister());
37       }
38       FALLTHROUGH_INTENDED;
39     case DataType::Type::kBool:
40     case DataType::Type::kUint8:
41     case DataType::Type::kInt8:
42     case DataType::Type::kUint16:
43     case DataType::Type::kInt16:
44     case DataType::Type::kInt32:
45       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
46                                     : Location::RequiresRegister());
47       locations->SetOut(Location::RequiresFpuRegister());
48       break;
49     case DataType::Type::kFloat32:
50     case DataType::Type::kFloat64:
51       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
52                                     : Location::RequiresFpuRegister());
53       locations->SetOut(is_zero ? Location::RequiresFpuRegister()
54                                 : Location::SameAsFirstInput());
55       break;
56     default:
57       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
58       UNREACHABLE();
59   }
60 }
61 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)62 void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
63   LocationSummary* locations = instruction->GetLocations();
64   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
65 
66   bool cpu_has_avx = CpuHasAvxFeatureFlag();
67   // Shorthand for any type of zero.
68   if (IsZeroBitPattern(instruction->InputAt(0))) {
69     cpu_has_avx ? __ vxorps(dst, dst, dst) : __ xorps(dst, dst);
70     return;
71   }
72 
73   switch (instruction->GetPackedType()) {
74     case DataType::Type::kBool:
75     case DataType::Type::kUint8:
76     case DataType::Type::kInt8:
77       DCHECK_EQ(16u, instruction->GetVectorLength());
78       __ movd(dst, locations->InAt(0).AsRegister<Register>());
79       __ punpcklbw(dst, dst);
80       __ punpcklwd(dst, dst);
81       __ pshufd(dst, dst, Immediate(0));
82       break;
83     case DataType::Type::kUint16:
84     case DataType::Type::kInt16:
85       DCHECK_EQ(8u, instruction->GetVectorLength());
86       __ movd(dst, locations->InAt(0).AsRegister<Register>());
87       __ punpcklwd(dst, dst);
88       __ pshufd(dst, dst, Immediate(0));
89       break;
90     case DataType::Type::kInt32:
91       DCHECK_EQ(4u, instruction->GetVectorLength());
92       __ movd(dst, locations->InAt(0).AsRegister<Register>());
93       __ pshufd(dst, dst, Immediate(0));
94       break;
95     case DataType::Type::kInt64: {
96       DCHECK_EQ(2u, instruction->GetVectorLength());
97       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
98       __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
99       __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
100       __ punpckldq(dst, tmp);
101       __ punpcklqdq(dst, dst);
102       break;
103     }
104     case DataType::Type::kFloat32:
105       DCHECK_EQ(4u, instruction->GetVectorLength());
106       DCHECK(locations->InAt(0).Equals(locations->Out()));
107       __ shufps(dst, dst, Immediate(0));
108       break;
109     case DataType::Type::kFloat64:
110       DCHECK_EQ(2u, instruction->GetVectorLength());
111       DCHECK(locations->InAt(0).Equals(locations->Out()));
112       __ shufpd(dst, dst, Immediate(0));
113       break;
114     default:
115       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
116       UNREACHABLE();
117   }
118 }
119 
VisitVecExtractScalar(HVecExtractScalar * instruction)120 void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
121   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
122   switch (instruction->GetPackedType()) {
123     case DataType::Type::kInt64:
124       // Long needs extra temporary to store into the register pair.
125       locations->AddTemp(Location::RequiresFpuRegister());
126       FALLTHROUGH_INTENDED;
127     case DataType::Type::kBool:
128     case DataType::Type::kUint8:
129     case DataType::Type::kInt8:
130     case DataType::Type::kUint16:
131     case DataType::Type::kInt16:
132     case DataType::Type::kInt32:
133       locations->SetInAt(0, Location::RequiresFpuRegister());
134       locations->SetOut(Location::RequiresRegister());
135       break;
136     case DataType::Type::kFloat32:
137     case DataType::Type::kFloat64:
138       locations->SetInAt(0, Location::RequiresFpuRegister());
139       locations->SetOut(Location::SameAsFirstInput());
140       break;
141     default:
142       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
143       UNREACHABLE();
144   }
145 }
146 
VisitVecExtractScalar(HVecExtractScalar * instruction)147 void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
148   LocationSummary* locations = instruction->GetLocations();
149   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
150   switch (instruction->GetPackedType()) {
151     case DataType::Type::kBool:
152     case DataType::Type::kUint8:
153     case DataType::Type::kInt8:
154     case DataType::Type::kUint16:
155     case DataType::Type::kInt16:  // TODO: up to here, and?
156       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
157       UNREACHABLE();
158     case DataType::Type::kInt32:
159       DCHECK_LE(4u, instruction->GetVectorLength());
160       DCHECK_LE(instruction->GetVectorLength(), 16u);
161       __ movd(locations->Out().AsRegister<Register>(), src);
162       break;
163     case DataType::Type::kInt64: {
164       DCHECK_EQ(2u, instruction->GetVectorLength());
165       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
166       __ movd(locations->Out().AsRegisterPairLow<Register>(), src);
167       __ pshufd(tmp, src, Immediate(1));
168       __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp);
169       break;
170     }
171     case DataType::Type::kFloat32:
172     case DataType::Type::kFloat64:
173       DCHECK_LE(2u, instruction->GetVectorLength());
174       DCHECK_LE(instruction->GetVectorLength(), 4u);
175       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
176       break;
177     default:
178       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
179       UNREACHABLE();
180   }
181 }
182 
183 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)184 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
185   LocationSummary* locations = new (allocator) LocationSummary(instruction);
186   switch (instruction->GetPackedType()) {
187     case DataType::Type::kBool:
188     case DataType::Type::kUint8:
189     case DataType::Type::kInt8:
190     case DataType::Type::kUint16:
191     case DataType::Type::kInt16:
192     case DataType::Type::kInt32:
193     case DataType::Type::kInt64:
194     case DataType::Type::kFloat32:
195     case DataType::Type::kFloat64:
196       locations->SetInAt(0, Location::RequiresFpuRegister());
197       locations->SetOut(Location::RequiresFpuRegister());
198       break;
199     default:
200       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
201       UNREACHABLE();
202   }
203 }
204 
VisitVecReduce(HVecReduce * instruction)205 void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) {
206   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
207   // Long reduction or min/max require a temporary.
208   if (instruction->GetPackedType() == DataType::Type::kInt64 ||
209       instruction->GetReductionKind() == HVecReduce::kMin ||
210       instruction->GetReductionKind() == HVecReduce::kMax) {
211     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
212   }
213 }
214 
VisitVecReduce(HVecReduce * instruction)215 void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
216   LocationSummary* locations = instruction->GetLocations();
217   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
218   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
219   switch (instruction->GetPackedType()) {
220     case DataType::Type::kInt32:
221       DCHECK_EQ(4u, instruction->GetVectorLength());
222       switch (instruction->GetReductionKind()) {
223         case HVecReduce::kSum:
224           __ movaps(dst, src);
225           __ phaddd(dst, dst);
226           __ phaddd(dst, dst);
227           break;
228         case HVecReduce::kMin:
229         case HVecReduce::kMax:
230           // Historical note: We've had a broken implementation here. b/117863065
231           // Do not draw on the old code if we ever want to bring MIN/MAX reduction back.
232           LOG(FATAL) << "Unsupported reduction type.";
233       }
234       break;
235     case DataType::Type::kInt64: {
236       DCHECK_EQ(2u, instruction->GetVectorLength());
237       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
238       switch (instruction->GetReductionKind()) {
239         case HVecReduce::kSum:
240           __ movaps(tmp, src);
241           __ movaps(dst, src);
242           __ punpckhqdq(tmp, tmp);
243           __ paddq(dst, tmp);
244           break;
245         case HVecReduce::kMin:
246         case HVecReduce::kMax:
247           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
248       }
249       break;
250     }
251     default:
252       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
253       UNREACHABLE();
254   }
255 }
256 
VisitVecCnv(HVecCnv * instruction)257 void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
258   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
259 }
260 
VisitVecCnv(HVecCnv * instruction)261 void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
262   LocationSummary* locations = instruction->GetLocations();
263   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
264   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
265   DataType::Type from = instruction->GetInputType();
266   DataType::Type to = instruction->GetResultType();
267   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
268     DCHECK_EQ(4u, instruction->GetVectorLength());
269     __ cvtdq2ps(dst, src);
270   } else {
271     LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
272   }
273 }
274 
VisitVecNeg(HVecNeg * instruction)275 void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) {
276   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
277 }
278 
VisitVecNeg(HVecNeg * instruction)279 void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
280   LocationSummary* locations = instruction->GetLocations();
281   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
282   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
283   switch (instruction->GetPackedType()) {
284     case DataType::Type::kUint8:
285     case DataType::Type::kInt8:
286       DCHECK_EQ(16u, instruction->GetVectorLength());
287       __ pxor(dst, dst);
288       __ psubb(dst, src);
289       break;
290     case DataType::Type::kUint16:
291     case DataType::Type::kInt16:
292       DCHECK_EQ(8u, instruction->GetVectorLength());
293       __ pxor(dst, dst);
294       __ psubw(dst, src);
295       break;
296     case DataType::Type::kInt32:
297       DCHECK_EQ(4u, instruction->GetVectorLength());
298       __ pxor(dst, dst);
299       __ psubd(dst, src);
300       break;
301     case DataType::Type::kInt64:
302       DCHECK_EQ(2u, instruction->GetVectorLength());
303       __ pxor(dst, dst);
304       __ psubq(dst, src);
305       break;
306     case DataType::Type::kFloat32:
307       DCHECK_EQ(4u, instruction->GetVectorLength());
308       __ xorps(dst, dst);
309       __ subps(dst, src);
310       break;
311     case DataType::Type::kFloat64:
312       DCHECK_EQ(2u, instruction->GetVectorLength());
313       __ xorpd(dst, dst);
314       __ subpd(dst, src);
315       break;
316     default:
317       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
318       UNREACHABLE();
319   }
320 }
321 
VisitVecAbs(HVecAbs * instruction)322 void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
323   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
324   // Integral-abs requires a temporary for the comparison.
325   if (instruction->GetPackedType() == DataType::Type::kInt32) {
326     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
327   }
328 }
329 
VisitVecAbs(HVecAbs * instruction)330 void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
331   LocationSummary* locations = instruction->GetLocations();
332   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
333   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
334   switch (instruction->GetPackedType()) {
335     case DataType::Type::kInt32: {
336       DCHECK_EQ(4u, instruction->GetVectorLength());
337       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
338       __ movaps(dst, src);
339       __ pxor(tmp, tmp);
340       __ pcmpgtd(tmp, dst);
341       __ pxor(dst, tmp);
342       __ psubd(dst, tmp);
343       break;
344     }
345     case DataType::Type::kFloat32:
346       DCHECK_EQ(4u, instruction->GetVectorLength());
347       __ pcmpeqb(dst, dst);  // all ones
348       __ psrld(dst, Immediate(1));
349       __ andps(dst, src);
350       break;
351     case DataType::Type::kFloat64:
352       DCHECK_EQ(2u, instruction->GetVectorLength());
353       __ pcmpeqb(dst, dst);  // all ones
354       __ psrlq(dst, Immediate(1));
355       __ andpd(dst, src);
356       break;
357     default:
358       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
359       UNREACHABLE();
360   }
361 }
362 
VisitVecNot(HVecNot * instruction)363 void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
364   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
365   // Boolean-not requires a temporary to construct the 16 x one.
366   if (instruction->GetPackedType() == DataType::Type::kBool) {
367     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
368   }
369 }
370 
VisitVecNot(HVecNot * instruction)371 void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
372   LocationSummary* locations = instruction->GetLocations();
373   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
374   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
375   switch (instruction->GetPackedType()) {
376     case DataType::Type::kBool: {  // special case boolean-not
377       DCHECK_EQ(16u, instruction->GetVectorLength());
378       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
379       __ pxor(dst, dst);
380       __ pcmpeqb(tmp, tmp);  // all ones
381       __ psubb(dst, tmp);  // 16 x one
382       __ pxor(dst, src);
383       break;
384     }
385     case DataType::Type::kUint8:
386     case DataType::Type::kInt8:
387     case DataType::Type::kUint16:
388     case DataType::Type::kInt16:
389     case DataType::Type::kInt32:
390     case DataType::Type::kInt64:
391       DCHECK_LE(2u, instruction->GetVectorLength());
392       DCHECK_LE(instruction->GetVectorLength(), 16u);
393       __ pcmpeqb(dst, dst);  // all ones
394       __ pxor(dst, src);
395       break;
396     case DataType::Type::kFloat32:
397       DCHECK_EQ(4u, instruction->GetVectorLength());
398       __ pcmpeqb(dst, dst);  // all ones
399       __ xorps(dst, src);
400       break;
401     case DataType::Type::kFloat64:
402       DCHECK_EQ(2u, instruction->GetVectorLength());
403       __ pcmpeqb(dst, dst);  // all ones
404       __ xorpd(dst, src);
405       break;
406     default:
407       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
408       UNREACHABLE();
409   }
410 }
411 
412 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)413 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
414   LocationSummary* locations = new (allocator) LocationSummary(instruction);
415   switch (instruction->GetPackedType()) {
416     case DataType::Type::kBool:
417     case DataType::Type::kUint8:
418     case DataType::Type::kInt8:
419     case DataType::Type::kUint16:
420     case DataType::Type::kInt16:
421     case DataType::Type::kInt32:
422     case DataType::Type::kInt64:
423     case DataType::Type::kFloat32:
424     case DataType::Type::kFloat64:
425       locations->SetInAt(0, Location::RequiresFpuRegister());
426       locations->SetInAt(1, Location::RequiresFpuRegister());
427       locations->SetOut(Location::SameAsFirstInput());
428       break;
429     default:
430       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
431       UNREACHABLE();
432   }
433 }
434 
CreateVecTerOpLocations(ArenaAllocator * allocator,HVecOperation * instruction)435 static void CreateVecTerOpLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
436   LocationSummary* locations = new (allocator) LocationSummary(instruction);
437   switch (instruction->GetPackedType()) {
438     case DataType::Type::kBool:
439     case DataType::Type::kUint8:
440     case DataType::Type::kInt8:
441     case DataType::Type::kUint16:
442     case DataType::Type::kInt16:
443     case DataType::Type::kInt32:
444     case DataType::Type::kInt64:
445     case DataType::Type::kFloat32:
446     case DataType::Type::kFloat64:
447       locations->SetInAt(0, Location::RequiresFpuRegister());
448       locations->SetInAt(1, Location::RequiresFpuRegister());
449       locations->SetOut(Location::RequiresFpuRegister());
450       break;
451     default:
452       LOG(FATAL) << "Unsupported SIMD type";
453       UNREACHABLE();
454   }
455 }
456 
VisitVecAdd(HVecAdd * instruction)457 void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
458   if (CpuHasAvxFeatureFlag()) {
459     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
460   } else {
461     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
462   }
463 }
464 
VisitVecAdd(HVecAdd * instruction)465 void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
466   bool cpu_has_avx = CpuHasAvxFeatureFlag();
467   LocationSummary* locations = instruction->GetLocations();
468   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
469   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
470   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
471   DCHECK(cpu_has_avx || other_src == dst);
472   switch (instruction->GetPackedType()) {
473     case DataType::Type::kUint8:
474     case DataType::Type::kInt8:
475       DCHECK_EQ(16u, instruction->GetVectorLength());
476       cpu_has_avx ? __ vpaddb(dst, other_src, src) : __ paddb(dst, src);
477       break;
478     case DataType::Type::kUint16:
479     case DataType::Type::kInt16:
480       DCHECK_EQ(8u, instruction->GetVectorLength());
481       cpu_has_avx ? __ vpaddw(dst, other_src, src) : __ paddw(dst, src);
482       break;
483     case DataType::Type::kInt32:
484       DCHECK_EQ(4u, instruction->GetVectorLength());
485       cpu_has_avx ?  __ vpaddd(dst, other_src, src) : __ paddd(dst, src);
486       break;
487     case DataType::Type::kInt64:
488       DCHECK_EQ(2u, instruction->GetVectorLength());
489       cpu_has_avx ? __ vpaddq(dst, other_src, src) : __ paddq(dst, src);
490       break;
491     case DataType::Type::kFloat32:
492       DCHECK_EQ(4u, instruction->GetVectorLength());
493       cpu_has_avx ? __ vaddps(dst, other_src, src) : __ addps(dst, src);
494       break;
495     case DataType::Type::kFloat64:
496       DCHECK_EQ(2u, instruction->GetVectorLength());
497       cpu_has_avx ? __ vaddpd(dst, other_src, src) : __ addpd(dst, src);
498       break;
499     default:
500       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
501       UNREACHABLE();
502   }
503 }
504 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)505 void LocationsBuilderX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
506   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
507 }
508 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)509 void InstructionCodeGeneratorX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
510   LocationSummary* locations = instruction->GetLocations();
511   DCHECK(locations->InAt(0).Equals(locations->Out()));
512   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
513   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
514   switch (instruction->GetPackedType()) {
515     case DataType::Type::kUint8:
516       DCHECK_EQ(16u, instruction->GetVectorLength());
517       __ paddusb(dst, src);
518       break;
519     case DataType::Type::kInt8:
520       DCHECK_EQ(16u, instruction->GetVectorLength());
521       __ paddsb(dst, src);
522       break;
523     case DataType::Type::kUint16:
524       DCHECK_EQ(8u, instruction->GetVectorLength());
525       __ paddusw(dst, src);
526       break;
527     case DataType::Type::kInt16:
528       DCHECK_EQ(8u, instruction->GetVectorLength());
529       __ paddsw(dst, src);
530       break;
531     default:
532       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
533       UNREACHABLE();
534   }
535 }
536 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)537 void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
538   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
539 }
540 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)541 void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
542   LocationSummary* locations = instruction->GetLocations();
543   DCHECK(locations->InAt(0).Equals(locations->Out()));
544   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
545   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
546 
547   DCHECK(instruction->IsRounded());
548 
549   switch (instruction->GetPackedType()) {
550     case DataType::Type::kUint8:
551       DCHECK_EQ(16u, instruction->GetVectorLength());
552       __ pavgb(dst, src);
553       break;
554     case DataType::Type::kUint16:
555       DCHECK_EQ(8u, instruction->GetVectorLength());
556       __ pavgw(dst, src);
557       break;
558     default:
559       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
560       UNREACHABLE();
561   }
562 }
563 
VisitVecSub(HVecSub * instruction)564 void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
565   if (CpuHasAvxFeatureFlag()) {
566     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
567   } else {
568     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
569   }
570 }
571 
VisitVecSub(HVecSub * instruction)572 void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
573   bool cpu_has_avx = CpuHasAvxFeatureFlag();
574   LocationSummary* locations = instruction->GetLocations();
575   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
576   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
577   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
578   DCHECK(cpu_has_avx || other_src == dst);
579   switch (instruction->GetPackedType()) {
580     case DataType::Type::kUint8:
581     case DataType::Type::kInt8:
582       DCHECK_EQ(16u, instruction->GetVectorLength());
583       cpu_has_avx ? __ vpsubb(dst, other_src, src) : __ psubb(dst, src);
584       break;
585     case DataType::Type::kUint16:
586     case DataType::Type::kInt16:
587       DCHECK_EQ(8u, instruction->GetVectorLength());
588       cpu_has_avx ? __ vpsubw(dst, other_src, src) : __ psubw(dst, src);
589       break;
590     case DataType::Type::kInt32:
591       DCHECK_EQ(4u, instruction->GetVectorLength());
592       cpu_has_avx ?  __ vpsubd(dst, other_src, src) : __ psubd(dst, src);
593       break;
594     case DataType::Type::kInt64:
595       DCHECK_EQ(2u, instruction->GetVectorLength());
596       cpu_has_avx ? __ vpsubq(dst, other_src, src) : __ psubq(dst, src);
597       break;
598     case DataType::Type::kFloat32:
599       DCHECK_EQ(4u, instruction->GetVectorLength());
600       cpu_has_avx ? __ vsubps(dst, other_src, src) : __ subps(dst, src);
601       break;
602     case DataType::Type::kFloat64:
603       DCHECK_EQ(2u, instruction->GetVectorLength());
604       cpu_has_avx ? __ vsubpd(dst, other_src, src) : __ subpd(dst, src);
605       break;
606     default:
607       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
608       UNREACHABLE();
609   }
610 }
611 
VisitVecSaturationSub(HVecSaturationSub * instruction)612 void LocationsBuilderX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
613   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
614 }
615 
VisitVecSaturationSub(HVecSaturationSub * instruction)616 void InstructionCodeGeneratorX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
617   LocationSummary* locations = instruction->GetLocations();
618   DCHECK(locations->InAt(0).Equals(locations->Out()));
619   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
620   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
621   switch (instruction->GetPackedType()) {
622     case DataType::Type::kUint8:
623       DCHECK_EQ(16u, instruction->GetVectorLength());
624       __ psubusb(dst, src);
625       break;
626     case DataType::Type::kInt8:
627       DCHECK_EQ(16u, instruction->GetVectorLength());
628       __ psubsb(dst, src);
629       break;
630     case DataType::Type::kUint16:
631       DCHECK_EQ(8u, instruction->GetVectorLength());
632       __ psubusw(dst, src);
633       break;
634     case DataType::Type::kInt16:
635       DCHECK_EQ(8u, instruction->GetVectorLength());
636       __ psubsw(dst, src);
637       break;
638     default:
639       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
640       UNREACHABLE();
641   }
642 }
643 
VisitVecMul(HVecMul * instruction)644 void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
645   if (CpuHasAvxFeatureFlag()) {
646     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
647   } else {
648     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
649   }
650 }
651 
VisitVecMul(HVecMul * instruction)652 void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
653   bool cpu_has_avx = CpuHasAvxFeatureFlag();
654   LocationSummary* locations = instruction->GetLocations();
655   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
656   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
657   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
658   DCHECK(cpu_has_avx || other_src == dst);
659   switch (instruction->GetPackedType()) {
660     case DataType::Type::kUint16:
661     case DataType::Type::kInt16:
662       DCHECK_EQ(8u, instruction->GetVectorLength());
663       cpu_has_avx ? __ vpmullw(dst, other_src, src) : __ pmullw(dst, src);
664       break;
665     case DataType::Type::kInt32:
666       DCHECK_EQ(4u, instruction->GetVectorLength());
667       cpu_has_avx ? __ vpmulld(dst, other_src, src) : __ pmulld(dst, src);
668       break;
669     case DataType::Type::kFloat32:
670       DCHECK_EQ(4u, instruction->GetVectorLength());
671       cpu_has_avx ? __ vmulps(dst, other_src, src) : __ mulps(dst, src);
672       break;
673     case DataType::Type::kFloat64:
674       DCHECK_EQ(2u, instruction->GetVectorLength());
675       cpu_has_avx ? __ vmulpd(dst, other_src, src) : __ mulpd(dst, src);
676       break;
677     default:
678       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
679       UNREACHABLE();
680   }
681 }
682 
VisitVecDiv(HVecDiv * instruction)683 void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
684   if (CpuHasAvxFeatureFlag()) {
685     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
686   } else {
687     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
688   }
689 }
690 
VisitVecDiv(HVecDiv * instruction)691 void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
692   bool cpu_has_avx = CpuHasAvxFeatureFlag();
693   LocationSummary* locations = instruction->GetLocations();
694   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
695   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
696   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
697   DCHECK(cpu_has_avx || other_src == dst);
698   switch (instruction->GetPackedType()) {
699     case DataType::Type::kFloat32:
700       DCHECK_EQ(4u, instruction->GetVectorLength());
701       cpu_has_avx ? __ vdivps(dst, other_src, src) : __ divps(dst, src);
702       break;
703     case DataType::Type::kFloat64:
704       DCHECK_EQ(2u, instruction->GetVectorLength());
705       cpu_has_avx ?  __ vdivpd(dst, other_src, src) : __ divpd(dst, src);
706       break;
707     default:
708       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
709       UNREACHABLE();
710   }
711 }
712 
VisitVecMin(HVecMin * instruction)713 void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
714   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
715 }
716 
VisitVecMin(HVecMin * instruction)717 void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
718   LocationSummary* locations = instruction->GetLocations();
719   DCHECK(locations->InAt(0).Equals(locations->Out()));
720   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
721   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
722   switch (instruction->GetPackedType()) {
723     case DataType::Type::kUint8:
724       DCHECK_EQ(16u, instruction->GetVectorLength());
725       __ pminub(dst, src);
726       break;
727     case DataType::Type::kInt8:
728       DCHECK_EQ(16u, instruction->GetVectorLength());
729       __ pminsb(dst, src);
730       break;
731     case DataType::Type::kUint16:
732       DCHECK_EQ(8u, instruction->GetVectorLength());
733       __ pminuw(dst, src);
734       break;
735     case DataType::Type::kInt16:
736       DCHECK_EQ(8u, instruction->GetVectorLength());
737       __ pminsw(dst, src);
738       break;
739     case DataType::Type::kUint32:
740       DCHECK_EQ(4u, instruction->GetVectorLength());
741       __ pminud(dst, src);
742       break;
743     case DataType::Type::kInt32:
744       DCHECK_EQ(4u, instruction->GetVectorLength());
745       __ pminsd(dst, src);
746       break;
747     // Next cases are sloppy wrt 0.0 vs -0.0.
748     case DataType::Type::kFloat32:
749       DCHECK_EQ(4u, instruction->GetVectorLength());
750       __ minps(dst, src);
751       break;
752     case DataType::Type::kFloat64:
753       DCHECK_EQ(2u, instruction->GetVectorLength());
754       __ minpd(dst, src);
755       break;
756     default:
757       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
758       UNREACHABLE();
759   }
760 }
761 
VisitVecMax(HVecMax * instruction)762 void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
763   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
764 }
765 
VisitVecMax(HVecMax * instruction)766 void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
767   LocationSummary* locations = instruction->GetLocations();
768   DCHECK(locations->InAt(0).Equals(locations->Out()));
769   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
770   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
771   switch (instruction->GetPackedType()) {
772     case DataType::Type::kUint8:
773       DCHECK_EQ(16u, instruction->GetVectorLength());
774       __ pmaxub(dst, src);
775       break;
776     case DataType::Type::kInt8:
777       DCHECK_EQ(16u, instruction->GetVectorLength());
778       __ pmaxsb(dst, src);
779       break;
780     case DataType::Type::kUint16:
781       DCHECK_EQ(8u, instruction->GetVectorLength());
782       __ pmaxuw(dst, src);
783       break;
784     case DataType::Type::kInt16:
785       DCHECK_EQ(8u, instruction->GetVectorLength());
786       __ pmaxsw(dst, src);
787       break;
788     case DataType::Type::kUint32:
789       DCHECK_EQ(4u, instruction->GetVectorLength());
790       __ pmaxud(dst, src);
791       break;
792     case DataType::Type::kInt32:
793       DCHECK_EQ(4u, instruction->GetVectorLength());
794       __ pmaxsd(dst, src);
795       break;
796     // Next cases are sloppy wrt 0.0 vs -0.0.
797     case DataType::Type::kFloat32:
798       DCHECK_EQ(4u, instruction->GetVectorLength());
799       __ maxps(dst, src);
800       break;
801     case DataType::Type::kFloat64:
802       DCHECK_EQ(2u, instruction->GetVectorLength());
803       __ maxpd(dst, src);
804       break;
805     default:
806       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
807       UNREACHABLE();
808   }
809 }
810 
VisitVecAnd(HVecAnd * instruction)811 void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
812   if (CpuHasAvxFeatureFlag()) {
813     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
814   } else {
815     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
816   }
817 }
818 
VisitVecAnd(HVecAnd * instruction)819 void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
820   bool cpu_has_avx = CpuHasAvxFeatureFlag();
821   LocationSummary* locations = instruction->GetLocations();
822   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
823   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
824   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
825   DCHECK(cpu_has_avx || other_src == dst);
826   switch (instruction->GetPackedType()) {
827     case DataType::Type::kBool:
828     case DataType::Type::kUint8:
829     case DataType::Type::kInt8:
830     case DataType::Type::kUint16:
831     case DataType::Type::kInt16:
832     case DataType::Type::kInt32:
833     case DataType::Type::kInt64:
834       DCHECK_LE(2u, instruction->GetVectorLength());
835       DCHECK_LE(instruction->GetVectorLength(), 16u);
836       cpu_has_avx ? __ vpand(dst, other_src, src) : __ pand(dst, src);
837       break;
838     case DataType::Type::kFloat32:
839       DCHECK_EQ(4u, instruction->GetVectorLength());
840       cpu_has_avx ? __ vandps(dst, other_src, src) : __ andps(dst, src);
841       break;
842     case DataType::Type::kFloat64:
843       DCHECK_EQ(2u, instruction->GetVectorLength());
844       cpu_has_avx ? __ vandpd(dst, other_src, src) : __ andpd(dst, src);
845       break;
846     default:
847       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
848       UNREACHABLE();
849   }
850 }
851 
VisitVecAndNot(HVecAndNot * instruction)852 void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) {
853   if (CpuHasAvxFeatureFlag()) {
854     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
855   } else {
856     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
857   }
858 }
859 
VisitVecAndNot(HVecAndNot * instruction)860 void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
861   bool cpu_has_avx = CpuHasAvxFeatureFlag();
862   LocationSummary* locations = instruction->GetLocations();
863   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
864   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
865   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
866   DCHECK(cpu_has_avx || other_src == dst);
867   switch (instruction->GetPackedType()) {
868     case DataType::Type::kBool:
869     case DataType::Type::kUint8:
870     case DataType::Type::kInt8:
871     case DataType::Type::kUint16:
872     case DataType::Type::kInt16:
873     case DataType::Type::kInt32:
874     case DataType::Type::kInt64:
875       DCHECK_LE(2u, instruction->GetVectorLength());
876       DCHECK_LE(instruction->GetVectorLength(), 16u);
877       cpu_has_avx ? __ vpandn(dst, other_src, src) : __ pandn(dst, src);
878       break;
879     case DataType::Type::kFloat32:
880       DCHECK_EQ(4u, instruction->GetVectorLength());
881       cpu_has_avx ? __ vandnps(dst, other_src, src) : __ andnps(dst, src);
882       break;
883     case DataType::Type::kFloat64:
884       DCHECK_EQ(2u, instruction->GetVectorLength());
885       cpu_has_avx ? __ vandnpd(dst, other_src, src) : __ andnpd(dst, src);
886       break;
887     default:
888       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
889       UNREACHABLE();
890   }
891 }
892 
VisitVecOr(HVecOr * instruction)893 void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) {
894   if (CpuHasAvxFeatureFlag()) {
895     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
896   } else {
897     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
898   }
899 }
900 
VisitVecOr(HVecOr * instruction)901 void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
902   bool cpu_has_avx = CpuHasAvxFeatureFlag();
903   LocationSummary* locations = instruction->GetLocations();
904   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
905   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
906   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
907   DCHECK(cpu_has_avx || other_src == dst);
908   switch (instruction->GetPackedType()) {
909     case DataType::Type::kBool:
910     case DataType::Type::kUint8:
911     case DataType::Type::kInt8:
912     case DataType::Type::kUint16:
913     case DataType::Type::kInt16:
914     case DataType::Type::kInt32:
915     case DataType::Type::kInt64:
916       DCHECK_LE(2u, instruction->GetVectorLength());
917       DCHECK_LE(instruction->GetVectorLength(), 16u);
918       cpu_has_avx ? __ vpor(dst, other_src, src) : __ por(dst, src);
919       break;
920     case DataType::Type::kFloat32:
921       DCHECK_EQ(4u, instruction->GetVectorLength());
922       cpu_has_avx ? __ vorps(dst, other_src, src) : __ orps(dst, src);
923       break;
924     case DataType::Type::kFloat64:
925       DCHECK_EQ(2u, instruction->GetVectorLength());
926       cpu_has_avx ? __ vorpd(dst, other_src, src) : __ orpd(dst, src);
927       break;
928     default:
929       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
930       UNREACHABLE();
931   }
932 }
933 
VisitVecXor(HVecXor * instruction)934 void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) {
935   if (CpuHasAvxFeatureFlag()) {
936     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
937   } else {
938     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
939   }
940 }
941 
VisitVecXor(HVecXor * instruction)942 void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
943   bool cpu_has_avx = CpuHasAvxFeatureFlag();
944   LocationSummary* locations = instruction->GetLocations();
945   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
946   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
947   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
948   DCHECK(cpu_has_avx || other_src == dst);
949   switch (instruction->GetPackedType()) {
950     case DataType::Type::kBool:
951     case DataType::Type::kUint8:
952     case DataType::Type::kInt8:
953     case DataType::Type::kUint16:
954     case DataType::Type::kInt16:
955     case DataType::Type::kInt32:
956     case DataType::Type::kInt64:
957       DCHECK_LE(2u, instruction->GetVectorLength());
958       DCHECK_LE(instruction->GetVectorLength(), 16u);
959       cpu_has_avx ? __ vpxor(dst, other_src, src) : __ pxor(dst, src);
960       break;
961     case DataType::Type::kFloat32:
962       DCHECK_EQ(4u, instruction->GetVectorLength());
963       cpu_has_avx ? __ vxorps(dst, other_src, src) : __ xorps(dst, src);
964       break;
965     case DataType::Type::kFloat64:
966       DCHECK_EQ(2u, instruction->GetVectorLength());
967       cpu_has_avx ? __ vxorpd(dst, other_src, src) : __ xorpd(dst, src);
968       break;
969     default:
970       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
971       UNREACHABLE();
972   }
973 }
974 
975 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)976 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
977   LocationSummary* locations = new (allocator) LocationSummary(instruction);
978   switch (instruction->GetPackedType()) {
979     case DataType::Type::kUint16:
980     case DataType::Type::kInt16:
981     case DataType::Type::kInt32:
982     case DataType::Type::kInt64:
983       locations->SetInAt(0, Location::RequiresFpuRegister());
984       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
985       locations->SetOut(Location::SameAsFirstInput());
986       break;
987     default:
988       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
989       UNREACHABLE();
990   }
991 }
992 
VisitVecShl(HVecShl * instruction)993 void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) {
994   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
995 }
996 
VisitVecShl(HVecShl * instruction)997 void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
998   LocationSummary* locations = instruction->GetLocations();
999   DCHECK(locations->InAt(0).Equals(locations->Out()));
1000   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
1001   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1002   switch (instruction->GetPackedType()) {
1003     case DataType::Type::kUint16:
1004     case DataType::Type::kInt16:
1005       DCHECK_EQ(8u, instruction->GetVectorLength());
1006       __ psllw(dst, Immediate(static_cast<uint8_t>(value)));
1007       break;
1008     case DataType::Type::kInt32:
1009       DCHECK_EQ(4u, instruction->GetVectorLength());
1010       __ pslld(dst, Immediate(static_cast<uint8_t>(value)));
1011       break;
1012     case DataType::Type::kInt64:
1013       DCHECK_EQ(2u, instruction->GetVectorLength());
1014       __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
1015       break;
1016     default:
1017       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1018       UNREACHABLE();
1019   }
1020 }
1021 
VisitVecShr(HVecShr * instruction)1022 void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) {
1023   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
1024 }
1025 
VisitVecShr(HVecShr * instruction)1026 void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
1027   LocationSummary* locations = instruction->GetLocations();
1028   DCHECK(locations->InAt(0).Equals(locations->Out()));
1029   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
1030   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1031   switch (instruction->GetPackedType()) {
1032     case DataType::Type::kUint16:
1033     case DataType::Type::kInt16:
1034       DCHECK_EQ(8u, instruction->GetVectorLength());
1035       __ psraw(dst, Immediate(static_cast<uint8_t>(value)));
1036       break;
1037     case DataType::Type::kInt32:
1038       DCHECK_EQ(4u, instruction->GetVectorLength());
1039       __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
1040       break;
1041     default:
1042       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1043       UNREACHABLE();
1044   }
1045 }
1046 
VisitVecUShr(HVecUShr * instruction)1047 void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) {
1048   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
1049 }
1050 
VisitVecUShr(HVecUShr * instruction)1051 void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
1052   LocationSummary* locations = instruction->GetLocations();
1053   DCHECK(locations->InAt(0).Equals(locations->Out()));
1054   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
1055   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1056   switch (instruction->GetPackedType()) {
1057     case DataType::Type::kUint16:
1058     case DataType::Type::kInt16:
1059       DCHECK_EQ(8u, instruction->GetVectorLength());
1060       __ psrlw(dst, Immediate(static_cast<uint8_t>(value)));
1061       break;
1062     case DataType::Type::kInt32:
1063       DCHECK_EQ(4u, instruction->GetVectorLength());
1064       __ psrld(dst, Immediate(static_cast<uint8_t>(value)));
1065       break;
1066     case DataType::Type::kInt64:
1067       DCHECK_EQ(2u, instruction->GetVectorLength());
1068       __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
1069       break;
1070     default:
1071       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1072       UNREACHABLE();
1073   }
1074 }
1075 
VisitVecSetScalars(HVecSetScalars * instruction)1076 void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
1077   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1078 
1079   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
1080 
1081   HInstruction* input = instruction->InputAt(0);
1082   bool is_zero = IsZeroBitPattern(input);
1083 
1084   switch (instruction->GetPackedType()) {
1085     case DataType::Type::kInt64:
1086       // Long needs extra temporary to load from register pairs.
1087       if (!is_zero) {
1088         locations->AddTemp(Location::RequiresFpuRegister());
1089       }
1090       FALLTHROUGH_INTENDED;
1091     case DataType::Type::kBool:
1092     case DataType::Type::kUint8:
1093     case DataType::Type::kInt8:
1094     case DataType::Type::kUint16:
1095     case DataType::Type::kInt16:
1096     case DataType::Type::kInt32:
1097       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1098                                     : Location::RequiresRegister());
1099       locations->SetOut(Location::RequiresFpuRegister());
1100       break;
1101     case DataType::Type::kFloat32:
1102     case DataType::Type::kFloat64:
1103       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1104                                     : Location::RequiresFpuRegister());
1105       locations->SetOut(Location::RequiresFpuRegister());
1106       break;
1107     default:
1108       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1109       UNREACHABLE();
1110   }
1111 }
1112 
VisitVecSetScalars(HVecSetScalars * instruction)1113 void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
1114   LocationSummary* locations = instruction->GetLocations();
1115   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1116 
1117   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
1118 
1119   // Zero out all other elements first.
1120   bool cpu_has_avx = CpuHasAvxFeatureFlag();
1121   cpu_has_avx ? __ vxorps(dst, dst, dst) : __ xorps(dst, dst);
1122 
1123   // Shorthand for any type of zero.
1124   if (IsZeroBitPattern(instruction->InputAt(0))) {
1125     return;
1126   }
1127 
1128   // Set required elements.
1129   switch (instruction->GetPackedType()) {
1130     case DataType::Type::kBool:
1131     case DataType::Type::kUint8:
1132     case DataType::Type::kInt8:
1133     case DataType::Type::kUint16:
1134     case DataType::Type::kInt16:  // TODO: up to here, and?
1135       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1136       UNREACHABLE();
1137     case DataType::Type::kInt32:
1138       DCHECK_EQ(4u, instruction->GetVectorLength());
1139       __ movd(dst, locations->InAt(0).AsRegister<Register>());
1140       break;
1141     case DataType::Type::kInt64: {
1142       DCHECK_EQ(2u, instruction->GetVectorLength());
1143       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1144       __ xorps(tmp, tmp);
1145       __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
1146       __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
1147       __ punpckldq(dst, tmp);
1148       break;
1149     }
1150     case DataType::Type::kFloat32:
1151       DCHECK_EQ(4u, instruction->GetVectorLength());
1152       __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
1153       break;
1154     case DataType::Type::kFloat64:
1155       DCHECK_EQ(2u, instruction->GetVectorLength());
1156       __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
1157       break;
1158     default:
1159       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1160       UNREACHABLE();
1161   }
1162 }
1163 
1164 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1165 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1166   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1167   switch (instruction->GetPackedType()) {
1168     case DataType::Type::kUint8:
1169     case DataType::Type::kInt8:
1170     case DataType::Type::kUint16:
1171     case DataType::Type::kInt16:
1172     case DataType::Type::kInt32:
1173     case DataType::Type::kInt64:
1174       locations->SetInAt(0, Location::RequiresFpuRegister());
1175       locations->SetInAt(1, Location::RequiresFpuRegister());
1176       locations->SetInAt(2, Location::RequiresFpuRegister());
1177       locations->SetOut(Location::SameAsFirstInput());
1178       break;
1179     default:
1180       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1181       UNREACHABLE();
1182   }
1183 }
1184 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1185 void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1186   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1187 }
1188 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1189 void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1190   // TODO: pmaddwd?
1191   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1192 }
1193 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1194 void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1195   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1196 }
1197 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1198 void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1199   // TODO: psadbw for unsigned?
1200   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1201 }
1202 
VisitVecDotProd(HVecDotProd * instruction)1203 void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) {
1204   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1205   locations->SetInAt(0, Location::RequiresFpuRegister());
1206   locations->SetInAt(1, Location::RequiresFpuRegister());
1207   locations->SetInAt(2, Location::RequiresFpuRegister());
1208   locations->SetOut(Location::SameAsFirstInput());
1209   locations->AddTemp(Location::RequiresFpuRegister());
1210 }
1211 
VisitVecDotProd(HVecDotProd * instruction)1212 void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) {
1213   bool cpu_has_avx = CpuHasAvxFeatureFlag();
1214   LocationSummary* locations = instruction->GetLocations();
1215   XmmRegister acc = locations->InAt(0).AsFpuRegister<XmmRegister>();
1216   XmmRegister left = locations->InAt(1).AsFpuRegister<XmmRegister>();
1217   XmmRegister right = locations->InAt(2).AsFpuRegister<XmmRegister>();
1218   switch (instruction->GetPackedType()) {
1219     case DataType::Type::kInt32: {
1220       DCHECK_EQ(4u, instruction->GetVectorLength());
1221       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1222       if (!cpu_has_avx) {
1223         __ movaps(tmp, right);
1224         __ pmaddwd(tmp, left);
1225         __ paddd(acc, tmp);
1226       } else {
1227         __ vpmaddwd(tmp, left, right);
1228         __ vpaddd(acc, acc, tmp);
1229       }
1230       break;
1231     }
1232     default:
1233       LOG(FATAL) << "Unsupported SIMD Type" << instruction->GetPackedType();
1234       UNREACHABLE();
1235   }
1236 }
1237 
1238 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1239 static void CreateVecMemLocations(ArenaAllocator* allocator,
1240                                   HVecMemoryOperation* instruction,
1241                                   bool is_load) {
1242   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1243   switch (instruction->GetPackedType()) {
1244     case DataType::Type::kBool:
1245     case DataType::Type::kUint8:
1246     case DataType::Type::kInt8:
1247     case DataType::Type::kUint16:
1248     case DataType::Type::kInt16:
1249     case DataType::Type::kInt32:
1250     case DataType::Type::kInt64:
1251     case DataType::Type::kFloat32:
1252     case DataType::Type::kFloat64:
1253       locations->SetInAt(0, Location::RequiresRegister());
1254       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1255       if (is_load) {
1256         locations->SetOut(Location::RequiresFpuRegister());
1257       } else {
1258         locations->SetInAt(2, Location::RequiresFpuRegister());
1259       }
1260       break;
1261     default:
1262       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1263       UNREACHABLE();
1264   }
1265 }
1266 
1267 // Helper to construct address for vector memory operations.
VecAddress(LocationSummary * locations,size_t size,bool is_string_char_at)1268 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
1269   Location base = locations->InAt(0);
1270   Location index = locations->InAt(1);
1271   ScaleFactor scale = TIMES_1;
1272   switch (size) {
1273     case 2: scale = TIMES_2; break;
1274     case 4: scale = TIMES_4; break;
1275     case 8: scale = TIMES_8; break;
1276     default: break;
1277   }
1278   // Incorporate the string or array offset in the address computation.
1279   uint32_t offset = is_string_char_at
1280       ? mirror::String::ValueOffset().Uint32Value()
1281       : mirror::Array::DataOffset(size).Uint32Value();
1282   return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
1283 }
1284 
VisitVecLoad(HVecLoad * instruction)1285 void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
1286   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1287   // String load requires a temporary for the compressed load.
1288   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1289     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1290   }
1291 }
1292 
VisitVecLoad(HVecLoad * instruction)1293 void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
1294   LocationSummary* locations = instruction->GetLocations();
1295   size_t size = DataType::Size(instruction->GetPackedType());
1296   Address address = VecAddress(locations, size, instruction->IsStringCharAt());
1297   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
1298   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1299   switch (instruction->GetPackedType()) {
1300     case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1301     case DataType::Type::kUint16:
1302       DCHECK_EQ(8u, instruction->GetVectorLength());
1303       // Special handling of compressed/uncompressed string load.
1304       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1305         NearLabel done, not_compressed;
1306         XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1307         // Test compression bit.
1308         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1309                       "Expecting 0=compressed, 1=uncompressed");
1310         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1311         __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
1312         __ j(kNotZero, &not_compressed);
1313         // Zero extend 8 compressed bytes into 8 chars.
1314         __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
1315         __ pxor(tmp, tmp);
1316         __ punpcklbw(reg, tmp);
1317         __ jmp(&done);
1318         // Load 4 direct uncompressed chars.
1319         __ Bind(&not_compressed);
1320         is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
1321         __ Bind(&done);
1322         return;
1323       }
1324       FALLTHROUGH_INTENDED;
1325     case DataType::Type::kBool:
1326     case DataType::Type::kUint8:
1327     case DataType::Type::kInt8:
1328     case DataType::Type::kInt32:
1329     case DataType::Type::kInt64:
1330       DCHECK_LE(2u, instruction->GetVectorLength());
1331       DCHECK_LE(instruction->GetVectorLength(), 16u);
1332       is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1333       break;
1334     case DataType::Type::kFloat32:
1335       DCHECK_EQ(4u, instruction->GetVectorLength());
1336       is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
1337       break;
1338     case DataType::Type::kFloat64:
1339       DCHECK_EQ(2u, instruction->GetVectorLength());
1340       is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
1341       break;
1342     default:
1343       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1344       UNREACHABLE();
1345   }
1346 }
1347 
VisitVecStore(HVecStore * instruction)1348 void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
1349   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1350 }
1351 
VisitVecStore(HVecStore * instruction)1352 void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
1353   LocationSummary* locations = instruction->GetLocations();
1354   size_t size = DataType::Size(instruction->GetPackedType());
1355   Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
1356   XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
1357   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1358   switch (instruction->GetPackedType()) {
1359     case DataType::Type::kBool:
1360     case DataType::Type::kUint8:
1361     case DataType::Type::kInt8:
1362     case DataType::Type::kUint16:
1363     case DataType::Type::kInt16:
1364     case DataType::Type::kInt32:
1365     case DataType::Type::kInt64:
1366       DCHECK_LE(2u, instruction->GetVectorLength());
1367       DCHECK_LE(instruction->GetVectorLength(), 16u);
1368       is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
1369       break;
1370     case DataType::Type::kFloat32:
1371       DCHECK_EQ(4u, instruction->GetVectorLength());
1372       is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
1373       break;
1374     case DataType::Type::kFloat64:
1375       DCHECK_EQ(2u, instruction->GetVectorLength());
1376       is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
1377       break;
1378     default:
1379       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1380       UNREACHABLE();
1381   }
1382 }
1383 
1384 #undef __
1385 
1386 }  // namespace x86
1387 }  // namespace art
1388